1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CGCleanup.h" 17 #include "clang/AST/Decl.h" 18 #include "clang/AST/StmtOpenMP.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/IR/CallSite.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/GlobalValue.h" 23 #include "llvm/IR/Value.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <cassert> 26 27 using namespace clang; 28 using namespace CodeGen; 29 30 namespace { 31 /// \brief Base class for handling code generation inside OpenMP regions. 32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 33 public: 34 CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS) 35 : CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {} 36 37 CGOpenMPRegionInfo(const OMPExecutableDirective &D) 38 : CGCapturedStmtInfo(CR_OpenMP), Directive(D) {} 39 40 /// \brief Get a variable or parameter for storing global thread id 41 /// inside OpenMP construct. 42 virtual const VarDecl *getThreadIDVariable() const = 0; 43 44 /// \brief Get an LValue for the current ThreadID variable. 45 /// \return LValue for thread id variable. This LValue always has type int32*. 46 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 47 48 /// \brief Emit the captured statement body. 49 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 50 51 static bool classof(const CGCapturedStmtInfo *Info) { 52 return Info->getKind() == CR_OpenMP; 53 } 54 protected: 55 /// \brief OpenMP executable directive associated with the region. 56 const OMPExecutableDirective &Directive; 57 }; 58 59 /// \brief API for captured statement code generation in OpenMP constructs. 60 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 61 public: 62 CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D, 63 const CapturedStmt &CS, const VarDecl *ThreadIDVar) 64 : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) { 65 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 66 } 67 /// \brief Get a variable or parameter for storing global thread id 68 /// inside OpenMP construct. 69 virtual const VarDecl *getThreadIDVariable() const override { 70 return ThreadIDVar; 71 } 72 /// \brief Get the name of the capture helper. 73 StringRef getHelperName() const override { return ".omp_outlined."; } 74 75 private: 76 /// \brief A variable or parameter storing global thread id for OpenMP 77 /// constructs. 78 const VarDecl *ThreadIDVar; 79 }; 80 81 /// \brief API for captured statement code generation in OpenMP constructs. 82 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { 83 public: 84 CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D, 85 const CapturedStmt &CS, 86 const VarDecl *ThreadIDVar, 87 const VarDecl *PartIDVar) 88 : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar), 89 PartIDVar(PartIDVar) { 90 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 91 } 92 /// \brief Get a variable or parameter for storing global thread id 93 /// inside OpenMP construct. 94 virtual const VarDecl *getThreadIDVariable() const override { 95 return ThreadIDVar; 96 } 97 98 /// \brief Get an LValue for the current ThreadID variable. 99 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 100 101 /// \brief Emit the captured statement body. 102 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 103 104 /// \brief Get the name of the capture helper. 105 StringRef getHelperName() const override { return ".omp_outlined."; } 106 107 private: 108 /// \brief A variable or parameter storing global thread id for OpenMP 109 /// constructs. 110 const VarDecl *ThreadIDVar; 111 /// \brief A variable or parameter storing part id for OpenMP tasking 112 /// constructs. 113 const VarDecl *PartIDVar; 114 }; 115 116 /// \brief API for inlined captured statement code generation in OpenMP 117 /// constructs. 118 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 119 public: 120 CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D, 121 CodeGenFunction::CGCapturedStmtInfo *OldCSI) 122 : CGOpenMPRegionInfo(D), OldCSI(OldCSI), 123 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 124 // \brief Retrieve the value of the context parameter. 125 virtual llvm::Value *getContextValue() const override { 126 if (OuterRegionInfo) 127 return OuterRegionInfo->getContextValue(); 128 llvm_unreachable("No context value for inlined OpenMP region"); 129 } 130 /// \brief Lookup the captured field decl for a variable. 131 virtual const FieldDecl *lookup(const VarDecl *VD) const override { 132 if (OuterRegionInfo) 133 return OuterRegionInfo->lookup(VD); 134 llvm_unreachable("Trying to reference VarDecl that is neither local nor " 135 "captured in outer OpenMP region"); 136 } 137 virtual FieldDecl *getThisFieldDecl() const override { 138 if (OuterRegionInfo) 139 return OuterRegionInfo->getThisFieldDecl(); 140 return nullptr; 141 } 142 /// \brief Get a variable or parameter for storing global thread id 143 /// inside OpenMP construct. 144 virtual const VarDecl *getThreadIDVariable() const override { 145 if (OuterRegionInfo) 146 return OuterRegionInfo->getThreadIDVariable(); 147 return nullptr; 148 } 149 150 /// \brief Get the name of the capture helper. 151 virtual StringRef getHelperName() const override { 152 llvm_unreachable("No helper name for inlined OpenMP construct"); 153 } 154 155 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 156 157 private: 158 /// \brief CodeGen info about outer OpenMP region. 159 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 160 CGOpenMPRegionInfo *OuterRegionInfo; 161 }; 162 } // namespace 163 164 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 165 return CGF.MakeNaturalAlignAddrLValue( 166 CGF.Builder.CreateAlignedLoad( 167 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 168 CGF.PointerAlignInBytes), 169 getThreadIDVariable() 170 ->getType() 171 ->castAs<PointerType>() 172 ->getPointeeType()); 173 } 174 175 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 176 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 177 CGF.EmitOMPPrivateClause(Directive, PrivateScope); 178 CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); 179 if (PrivateScope.Privatize()) 180 // Emit implicit barrier to synchronize threads and avoid data races. 181 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(), 182 /*IsExplicit=*/false); 183 CGCapturedStmtInfo::EmitBody(CGF, S); 184 } 185 186 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 187 CodeGenFunction &CGF) { 188 return CGF.MakeNaturalAlignAddrLValue( 189 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 190 getThreadIDVariable()->getType()); 191 } 192 193 void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF, 194 const Stmt *S) { 195 if (PartIDVar) { 196 // TODO: emit code for untied tasks. 197 } 198 CGCapturedStmtInfo::EmitBody(CGF, S); 199 } 200 201 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 202 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { 203 IdentTy = llvm::StructType::create( 204 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 205 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 206 CGM.Int8PtrTy /* psource */, nullptr); 207 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 208 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 209 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 210 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 211 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 212 } 213 214 void CGOpenMPRuntime::clear() { 215 InternalVars.clear(); 216 } 217 218 llvm::Value * 219 CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D, 220 const VarDecl *ThreadIDVar) { 221 assert(ThreadIDVar->getType()->isPointerType() && 222 "thread id variable must be of type kmp_int32 *"); 223 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 224 CodeGenFunction CGF(CGM, true); 225 CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar); 226 CGF.CapturedStmtInfo = &CGInfo; 227 return CGF.GenerateCapturedStmtFunction(*CS); 228 } 229 230 llvm::Value * 231 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D, 232 const VarDecl *ThreadIDVar, 233 const VarDecl *PartIDVar) { 234 assert(!ThreadIDVar->getType()->isPointerType() && 235 "thread id variable must be of type kmp_int32 for tasks"); 236 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 237 CodeGenFunction CGF(CGM, true); 238 CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar); 239 CGF.CapturedStmtInfo = &CGInfo; 240 return CGF.GenerateCapturedStmtFunction(*CS); 241 } 242 243 llvm::Value * 244 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 245 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 246 if (!Entry) { 247 if (!DefaultOpenMPPSource) { 248 // Initialize default location for psource field of ident_t structure of 249 // all ident_t objects. Format is ";file;function;line;column;;". 250 // Taken from 251 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 252 DefaultOpenMPPSource = 253 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); 254 DefaultOpenMPPSource = 255 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 256 } 257 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 258 CGM.getModule(), IdentTy, /*isConstant*/ true, 259 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 260 DefaultOpenMPLocation->setUnnamedAddr(true); 261 262 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 263 llvm::Constant *Values[] = {Zero, 264 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 265 Zero, Zero, DefaultOpenMPPSource}; 266 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 267 DefaultOpenMPLocation->setInitializer(Init); 268 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; 269 return DefaultOpenMPLocation; 270 } 271 return Entry; 272 } 273 274 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 275 SourceLocation Loc, 276 OpenMPLocationFlags Flags) { 277 // If no debug info is generated - return global default location. 278 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 279 Loc.isInvalid()) 280 return getOrCreateDefaultLocation(Flags); 281 282 assert(CGF.CurFn && "No function in current CodeGenFunction."); 283 284 llvm::Value *LocValue = nullptr; 285 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 286 if (I != OpenMPLocThreadIDMap.end()) 287 LocValue = I->second.DebugLoc; 288 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 289 // GetOpenMPThreadID was called before this routine. 290 if (LocValue == nullptr) { 291 // Generate "ident_t .kmpc_loc.addr;" 292 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); 293 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); 294 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 295 Elem.second.DebugLoc = AI; 296 LocValue = AI; 297 298 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 299 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 300 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 301 llvm::ConstantExpr::getSizeOf(IdentTy), 302 CGM.PointerAlignInBytes); 303 } 304 305 // char **psource = &.kmpc_loc_<flags>.addr.psource; 306 auto *PSource = 307 CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource); 308 309 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 310 if (OMPDebugLoc == nullptr) { 311 SmallString<128> Buffer2; 312 llvm::raw_svector_ostream OS2(Buffer2); 313 // Build debug location 314 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 315 OS2 << ";" << PLoc.getFilename() << ";"; 316 if (const FunctionDecl *FD = 317 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 318 OS2 << FD->getQualifiedNameAsString(); 319 } 320 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 321 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 322 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 323 } 324 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 325 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 326 327 return LocValue; 328 } 329 330 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 331 SourceLocation Loc) { 332 assert(CGF.CurFn && "No function in current CodeGenFunction."); 333 334 llvm::Value *ThreadID = nullptr; 335 // Check whether we've already cached a load of the thread id in this 336 // function. 337 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 338 if (I != OpenMPLocThreadIDMap.end()) { 339 ThreadID = I->second.ThreadID; 340 if (ThreadID != nullptr) 341 return ThreadID; 342 } 343 if (auto OMPRegionInfo = 344 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 345 if (OMPRegionInfo->getThreadIDVariable()) { 346 // Check if this an outlined function with thread id passed as argument. 347 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 348 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 349 // If value loaded in entry block, cache it and use it everywhere in 350 // function. 351 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 352 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 353 Elem.second.ThreadID = ThreadID; 354 } 355 return ThreadID; 356 } 357 } 358 359 // This is not an outlined function region - need to call __kmpc_int32 360 // kmpc_global_thread_num(ident_t *loc). 361 // Generate thread id value and cache this value for use across the 362 // function. 363 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 364 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 365 ThreadID = 366 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 367 emitUpdateLocation(CGF, Loc)); 368 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 369 Elem.second.ThreadID = ThreadID; 370 return ThreadID; 371 } 372 373 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 374 assert(CGF.CurFn && "No function in current CodeGenFunction."); 375 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 376 OpenMPLocThreadIDMap.erase(CGF.CurFn); 377 } 378 379 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 380 return llvm::PointerType::getUnqual(IdentTy); 381 } 382 383 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 384 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 385 } 386 387 llvm::Constant * 388 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 389 llvm::Constant *RTLFn = nullptr; 390 switch (Function) { 391 case OMPRTL__kmpc_fork_call: { 392 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 393 // microtask, ...); 394 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 395 getKmpc_MicroPointerTy()}; 396 llvm::FunctionType *FnTy = 397 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 398 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 399 break; 400 } 401 case OMPRTL__kmpc_global_thread_num: { 402 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 403 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 404 llvm::FunctionType *FnTy = 405 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 406 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 407 break; 408 } 409 case OMPRTL__kmpc_threadprivate_cached: { 410 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 411 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 412 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 413 CGM.VoidPtrTy, CGM.SizeTy, 414 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 415 llvm::FunctionType *FnTy = 416 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 417 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 418 break; 419 } 420 case OMPRTL__kmpc_critical: { 421 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 422 // kmp_critical_name *crit); 423 llvm::Type *TypeParams[] = { 424 getIdentTyPointerTy(), CGM.Int32Ty, 425 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 426 llvm::FunctionType *FnTy = 427 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 428 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 429 break; 430 } 431 case OMPRTL__kmpc_threadprivate_register: { 432 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 433 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 434 // typedef void *(*kmpc_ctor)(void *); 435 auto KmpcCtorTy = 436 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 437 /*isVarArg*/ false)->getPointerTo(); 438 // typedef void *(*kmpc_cctor)(void *, void *); 439 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 440 auto KmpcCopyCtorTy = 441 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 442 /*isVarArg*/ false)->getPointerTo(); 443 // typedef void (*kmpc_dtor)(void *); 444 auto KmpcDtorTy = 445 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 446 ->getPointerTo(); 447 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 448 KmpcCopyCtorTy, KmpcDtorTy}; 449 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 450 /*isVarArg*/ false); 451 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 452 break; 453 } 454 case OMPRTL__kmpc_end_critical: { 455 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 456 // kmp_critical_name *crit); 457 llvm::Type *TypeParams[] = { 458 getIdentTyPointerTy(), CGM.Int32Ty, 459 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 460 llvm::FunctionType *FnTy = 461 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 462 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 463 break; 464 } 465 case OMPRTL__kmpc_cancel_barrier: { 466 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 467 // global_tid); 468 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 469 llvm::FunctionType *FnTy = 470 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 471 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 472 break; 473 } 474 case OMPRTL__kmpc_for_static_fini: { 475 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 476 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 477 llvm::FunctionType *FnTy = 478 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 479 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 480 break; 481 } 482 case OMPRTL__kmpc_push_num_threads: { 483 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 484 // kmp_int32 num_threads) 485 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 486 CGM.Int32Ty}; 487 llvm::FunctionType *FnTy = 488 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 489 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 490 break; 491 } 492 case OMPRTL__kmpc_serialized_parallel: { 493 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 494 // global_tid); 495 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 496 llvm::FunctionType *FnTy = 497 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 498 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 499 break; 500 } 501 case OMPRTL__kmpc_end_serialized_parallel: { 502 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 503 // global_tid); 504 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 505 llvm::FunctionType *FnTy = 506 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 507 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 508 break; 509 } 510 case OMPRTL__kmpc_flush: { 511 // Build void __kmpc_flush(ident_t *loc); 512 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 513 llvm::FunctionType *FnTy = 514 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 515 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 516 break; 517 } 518 case OMPRTL__kmpc_master: { 519 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 520 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 521 llvm::FunctionType *FnTy = 522 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 523 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 524 break; 525 } 526 case OMPRTL__kmpc_end_master: { 527 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 528 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 529 llvm::FunctionType *FnTy = 530 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 531 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 532 break; 533 } 534 case OMPRTL__kmpc_omp_taskyield: { 535 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 536 // int end_part); 537 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 538 llvm::FunctionType *FnTy = 539 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 540 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 541 break; 542 } 543 case OMPRTL__kmpc_single: { 544 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 545 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 546 llvm::FunctionType *FnTy = 547 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 548 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 549 break; 550 } 551 case OMPRTL__kmpc_end_single: { 552 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 553 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 554 llvm::FunctionType *FnTy = 555 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 556 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 557 break; 558 } 559 case OMPRTL__kmpc_omp_task_alloc: { 560 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 561 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 562 // kmp_routine_entry_t *task_entry); 563 assert(KmpRoutineEntryPtrTy != nullptr && 564 "Type kmp_routine_entry_t must be created."); 565 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 566 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 567 // Return void * and then cast to particular kmp_task_t type. 568 llvm::FunctionType *FnTy = 569 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 570 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 571 break; 572 } 573 case OMPRTL__kmpc_omp_task: { 574 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 575 // *new_task); 576 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 577 CGM.VoidPtrTy}; 578 llvm::FunctionType *FnTy = 579 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 580 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 581 break; 582 } 583 case OMPRTL__kmpc_copyprivate: { 584 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 585 // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 586 // kmp_int32 didit); 587 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 588 auto *CpyFnTy = 589 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 590 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 591 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 592 CGM.Int32Ty}; 593 llvm::FunctionType *FnTy = 594 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 595 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 596 break; 597 } 598 } 599 return RTLFn; 600 } 601 602 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, 603 bool IVSigned) { 604 assert((IVSize == 32 || IVSize == 64) && 605 "IV size is not compatible with the omp runtime"); 606 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 607 : "__kmpc_for_static_init_4u") 608 : (IVSigned ? "__kmpc_for_static_init_8" 609 : "__kmpc_for_static_init_8u"); 610 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 611 auto PtrTy = llvm::PointerType::getUnqual(ITy); 612 llvm::Type *TypeParams[] = { 613 getIdentTyPointerTy(), // loc 614 CGM.Int32Ty, // tid 615 CGM.Int32Ty, // schedtype 616 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 617 PtrTy, // p_lower 618 PtrTy, // p_upper 619 PtrTy, // p_stride 620 ITy, // incr 621 ITy // chunk 622 }; 623 llvm::FunctionType *FnTy = 624 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 625 return CGM.CreateRuntimeFunction(FnTy, Name); 626 } 627 628 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, 629 bool IVSigned) { 630 assert((IVSize == 32 || IVSize == 64) && 631 "IV size is not compatible with the omp runtime"); 632 auto Name = 633 IVSize == 32 634 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 635 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 636 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 637 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 638 CGM.Int32Ty, // tid 639 CGM.Int32Ty, // schedtype 640 ITy, // lower 641 ITy, // upper 642 ITy, // stride 643 ITy // chunk 644 }; 645 llvm::FunctionType *FnTy = 646 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 647 return CGM.CreateRuntimeFunction(FnTy, Name); 648 } 649 650 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, 651 bool IVSigned) { 652 assert((IVSize == 32 || IVSize == 64) && 653 "IV size is not compatible with the omp runtime"); 654 auto Name = 655 IVSize == 32 656 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 657 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 658 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 659 auto PtrTy = llvm::PointerType::getUnqual(ITy); 660 llvm::Type *TypeParams[] = { 661 getIdentTyPointerTy(), // loc 662 CGM.Int32Ty, // tid 663 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 664 PtrTy, // p_lower 665 PtrTy, // p_upper 666 PtrTy // p_stride 667 }; 668 llvm::FunctionType *FnTy = 669 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 670 return CGM.CreateRuntimeFunction(FnTy, Name); 671 } 672 673 llvm::Constant * 674 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 675 // Lookup the entry, lazily creating it if necessary. 676 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 677 Twine(CGM.getMangledName(VD)) + ".cache."); 678 } 679 680 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 681 const VarDecl *VD, 682 llvm::Value *VDAddr, 683 SourceLocation Loc) { 684 auto VarTy = VDAddr->getType()->getPointerElementType(); 685 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 686 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), 687 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 688 getOrCreateThreadPrivateCache(VD)}; 689 return CGF.EmitRuntimeCall( 690 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); 691 } 692 693 void CGOpenMPRuntime::emitThreadPrivateVarInit( 694 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, 695 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 696 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 697 // library. 698 auto OMPLoc = emitUpdateLocation(CGF, Loc); 699 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 700 OMPLoc); 701 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 702 // to register constructor/destructor for variable. 703 llvm::Value *Args[] = {OMPLoc, 704 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), 705 Ctor, CopyCtor, Dtor}; 706 CGF.EmitRuntimeCall( 707 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 708 } 709 710 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 711 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, 712 bool PerformInit, CodeGenFunction *CGF) { 713 VD = VD->getDefinition(CGM.getContext()); 714 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 715 ThreadPrivateWithDefinition.insert(VD); 716 QualType ASTTy = VD->getType(); 717 718 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 719 auto Init = VD->getAnyInitializer(); 720 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 721 // Generate function that re-emits the declaration's initializer into the 722 // threadprivate copy of the variable VD 723 CodeGenFunction CtorCGF(CGM); 724 FunctionArgList Args; 725 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 726 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 727 Args.push_back(&Dst); 728 729 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 730 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 731 /*isVariadic=*/false); 732 auto FTy = CGM.getTypes().GetFunctionType(FI); 733 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 734 FTy, ".__kmpc_global_ctor_.", Loc); 735 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 736 Args, SourceLocation()); 737 auto ArgVal = CtorCGF.EmitLoadOfScalar( 738 CtorCGF.GetAddrOfLocalVar(&Dst), 739 /*Volatile=*/false, CGM.PointerAlignInBytes, 740 CGM.getContext().VoidPtrTy, Dst.getLocation()); 741 auto Arg = CtorCGF.Builder.CreatePointerCast( 742 ArgVal, 743 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); 744 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 745 /*IsInitializer=*/true); 746 ArgVal = CtorCGF.EmitLoadOfScalar( 747 CtorCGF.GetAddrOfLocalVar(&Dst), 748 /*Volatile=*/false, CGM.PointerAlignInBytes, 749 CGM.getContext().VoidPtrTy, Dst.getLocation()); 750 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 751 CtorCGF.FinishFunction(); 752 Ctor = Fn; 753 } 754 if (VD->getType().isDestructedType() != QualType::DK_none) { 755 // Generate function that emits destructor call for the threadprivate copy 756 // of the variable VD 757 CodeGenFunction DtorCGF(CGM); 758 FunctionArgList Args; 759 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 760 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 761 Args.push_back(&Dst); 762 763 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 764 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 765 /*isVariadic=*/false); 766 auto FTy = CGM.getTypes().GetFunctionType(FI); 767 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 768 FTy, ".__kmpc_global_dtor_.", Loc); 769 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 770 SourceLocation()); 771 auto ArgVal = DtorCGF.EmitLoadOfScalar( 772 DtorCGF.GetAddrOfLocalVar(&Dst), 773 /*Volatile=*/false, CGM.PointerAlignInBytes, 774 CGM.getContext().VoidPtrTy, Dst.getLocation()); 775 DtorCGF.emitDestroy(ArgVal, ASTTy, 776 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 777 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 778 DtorCGF.FinishFunction(); 779 Dtor = Fn; 780 } 781 // Do not emit init function if it is not required. 782 if (!Ctor && !Dtor) 783 return nullptr; 784 785 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 786 auto CopyCtorTy = 787 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 788 /*isVarArg=*/false)->getPointerTo(); 789 // Copying constructor for the threadprivate variable. 790 // Must be NULL - reserved by runtime, but currently it requires that this 791 // parameter is always NULL. Otherwise it fires assertion. 792 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 793 if (Ctor == nullptr) { 794 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 795 /*isVarArg=*/false)->getPointerTo(); 796 Ctor = llvm::Constant::getNullValue(CtorTy); 797 } 798 if (Dtor == nullptr) { 799 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 800 /*isVarArg=*/false)->getPointerTo(); 801 Dtor = llvm::Constant::getNullValue(DtorTy); 802 } 803 if (!CGF) { 804 auto InitFunctionTy = 805 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 806 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 807 InitFunctionTy, ".__omp_threadprivate_init_."); 808 CodeGenFunction InitCGF(CGM); 809 FunctionArgList ArgList; 810 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 811 CGM.getTypes().arrangeNullaryFunction(), ArgList, 812 Loc); 813 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 814 InitCGF.FinishFunction(); 815 return InitFunction; 816 } 817 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 818 } 819 return nullptr; 820 } 821 822 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 823 llvm::Value *OutlinedFn, 824 llvm::Value *CapturedStruct) { 825 // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) 826 llvm::Value *Args[] = { 827 emitUpdateLocation(CGF, Loc), 828 CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument 829 // (there is only one additional argument - 'context') 830 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), 831 CGF.EmitCastToVoidPtr(CapturedStruct)}; 832 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 833 CGF.EmitRuntimeCall(RTLFn, Args); 834 } 835 836 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc, 837 llvm::Value *OutlinedFn, 838 llvm::Value *CapturedStruct) { 839 auto ThreadID = getThreadID(CGF, Loc); 840 // Build calls: 841 // __kmpc_serialized_parallel(&Loc, GTid); 842 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 843 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 844 Args); 845 846 // OutlinedFn(>id, &zero, CapturedStruct); 847 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 848 auto Int32Ty = 849 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 850 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); 851 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 852 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; 853 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 854 855 // __kmpc_end_serialized_parallel(&Loc, GTid); 856 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 857 CGF.EmitRuntimeCall( 858 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 859 } 860 861 // If we're inside an (outlined) parallel region, use the region info's 862 // thread-ID variable (it is passed in a first argument of the outlined function 863 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 864 // regular serial code region, get thread ID by calling kmp_int32 865 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 866 // return the address of that temp. 867 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 868 SourceLocation Loc) { 869 if (auto OMPRegionInfo = 870 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 871 if (OMPRegionInfo->getThreadIDVariable()) 872 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 873 874 auto ThreadID = getThreadID(CGF, Loc); 875 auto Int32Ty = 876 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 877 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 878 CGF.EmitStoreOfScalar(ThreadID, 879 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); 880 881 return ThreadIDTemp; 882 } 883 884 llvm::Constant * 885 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 886 const llvm::Twine &Name) { 887 SmallString<256> Buffer; 888 llvm::raw_svector_ostream Out(Buffer); 889 Out << Name; 890 auto RuntimeName = Out.str(); 891 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 892 if (Elem.second) { 893 assert(Elem.second->getType()->getPointerElementType() == Ty && 894 "OMP internal variable has different type than requested"); 895 return &*Elem.second; 896 } 897 898 return Elem.second = new llvm::GlobalVariable( 899 CGM.getModule(), Ty, /*IsConstant*/ false, 900 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 901 Elem.first()); 902 } 903 904 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 905 llvm::Twine Name(".gomp_critical_user_", CriticalName); 906 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 907 } 908 909 void CGOpenMPRuntime::emitCriticalRegion( 910 CodeGenFunction &CGF, StringRef CriticalName, 911 const std::function<void()> &CriticalOpGen, SourceLocation Loc) { 912 auto RegionLock = getCriticalRegionLock(CriticalName); 913 // __kmpc_critical(ident_t *, gtid, Lock); 914 // CriticalOpGen(); 915 // __kmpc_end_critical(ident_t *, gtid, Lock); 916 // Prepare arguments and build a call to __kmpc_critical 917 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 918 RegionLock}; 919 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 920 CriticalOpGen(); 921 // Build a call to __kmpc_end_critical 922 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 923 } 924 925 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 926 const std::function<void()> &BodyOpGen) { 927 llvm::Value *CallBool = CGF.EmitScalarConversion( 928 IfCond, 929 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 930 CGF.getContext().BoolTy); 931 932 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 933 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 934 // Generate the branch (If-stmt) 935 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 936 CGF.EmitBlock(ThenBlock); 937 BodyOpGen(); 938 // Emit the rest of bblocks/branches 939 CGF.EmitBranch(ContBlock); 940 CGF.EmitBlock(ContBlock, true); 941 } 942 943 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 944 const std::function<void()> &MasterOpGen, 945 SourceLocation Loc) { 946 // if(__kmpc_master(ident_t *, gtid)) { 947 // MasterOpGen(); 948 // __kmpc_end_master(ident_t *, gtid); 949 // } 950 // Prepare arguments and build a call to __kmpc_master 951 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 952 auto *IsMaster = 953 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 954 emitIfStmt(CGF, IsMaster, [&]() -> void { 955 MasterOpGen(); 956 // Build a call to __kmpc_end_master. 957 // OpenMP [1.2.2 OpenMP Language Terminology] 958 // For C/C++, an executable statement, possibly compound, with a single 959 // entry at the top and a single exit at the bottom, or an OpenMP construct. 960 // * Access to the structured block must not be the result of a branch. 961 // * The point of exit cannot be a branch out of the structured block. 962 // * The point of entry must not be a call to setjmp(). 963 // * longjmp() and throw() must not violate the entry/exit criteria. 964 // * An expression statement, iteration statement, selection statement, or 965 // try block is considered to be a structured block if the corresponding 966 // compound statement obtained by enclosing it in { and } would be a 967 // structured block. 968 // It is analyzed in Sema, so we can just call __kmpc_end_master() on 969 // fallthrough rather than pushing a normal cleanup for it. 970 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args); 971 }); 972 } 973 974 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 975 SourceLocation Loc) { 976 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 977 llvm::Value *Args[] = { 978 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 979 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 980 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 981 } 982 983 static llvm::Value *emitCopyprivateCopyFunction( 984 CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> SrcExprs, 985 ArrayRef<const Expr *> DstExprs, ArrayRef<const Expr *> AssignmentOps) { 986 auto &C = CGM.getContext(); 987 // void copy_func(void *LHSArg, void *RHSArg); 988 FunctionArgList Args; 989 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 990 C.VoidPtrTy); 991 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, 992 C.VoidPtrTy); 993 Args.push_back(&LHSArg); 994 Args.push_back(&RHSArg); 995 FunctionType::ExtInfo EI; 996 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( 997 C.VoidTy, Args, EI, /*isVariadic=*/false); 998 auto *Fn = llvm::Function::Create( 999 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, 1000 ".omp.copyprivate.copy_func", &CGM.getModule()); 1001 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); 1002 CodeGenFunction CGF(CGM); 1003 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); 1004 // Dst = (void*[n])(LHSArg); 1005 // Src = (void*[n])(RHSArg); 1006 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1007 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), 1008 CGF.PointerAlignInBytes), 1009 ArgsType); 1010 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1011 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), 1012 CGF.PointerAlignInBytes), 1013 ArgsType); 1014 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 1015 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 1016 // ... 1017 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 1018 CodeGenFunction::OMPPrivateScope Scope(CGF); 1019 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 1020 Scope.addPrivate( 1021 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()), 1022 [&]() -> llvm::Value *{ 1023 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1024 CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(RHS, I), 1025 CGM.PointerAlignInBytes), 1026 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1027 }); 1028 Scope.addPrivate( 1029 cast<VarDecl>(cast<DeclRefExpr>(DstExprs[I])->getDecl()), 1030 [&]() -> llvm::Value *{ 1031 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1032 CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(LHS, I), 1033 CGM.PointerAlignInBytes), 1034 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); 1035 }); 1036 } 1037 Scope.Privatize(); 1038 for (auto *E : AssignmentOps) { 1039 CGF.EmitIgnoredExpr(E); 1040 } 1041 Scope.ForceCleanup(); 1042 CGF.FinishFunction(); 1043 return Fn; 1044 } 1045 1046 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 1047 const std::function<void()> &SingleOpGen, 1048 SourceLocation Loc, 1049 ArrayRef<const Expr *> CopyprivateVars, 1050 ArrayRef<const Expr *> SrcExprs, 1051 ArrayRef<const Expr *> DstExprs, 1052 ArrayRef<const Expr *> AssignmentOps) { 1053 assert(CopyprivateVars.size() == SrcExprs.size() && 1054 CopyprivateVars.size() == DstExprs.size() && 1055 CopyprivateVars.size() == AssignmentOps.size()); 1056 auto &C = CGM.getContext(); 1057 // int32 did_it = 0; 1058 // if(__kmpc_single(ident_t *, gtid)) { 1059 // SingleOpGen(); 1060 // __kmpc_end_single(ident_t *, gtid); 1061 // did_it = 1; 1062 // } 1063 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1064 // <copy_func>, did_it); 1065 1066 llvm::AllocaInst *DidIt = nullptr; 1067 if (!CopyprivateVars.empty()) { 1068 // int32 did_it = 0; 1069 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1070 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 1071 CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0)); 1072 } 1073 // Prepare arguments and build a call to __kmpc_single 1074 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 1075 auto *IsSingle = 1076 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 1077 emitIfStmt(CGF, IsSingle, [&]() -> void { 1078 SingleOpGen(); 1079 if (DidIt) { 1080 // did_it = 1; 1081 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, 1082 DidIt->getAlignment()); 1083 } 1084 // Build a call to __kmpc_end_single. 1085 // OpenMP [1.2.2 OpenMP Language Terminology] 1086 // For C/C++, an executable statement, possibly compound, with a single 1087 // entry at the top and a single exit at the bottom, or an OpenMP construct. 1088 // * Access to the structured block must not be the result of a branch. 1089 // * The point of exit cannot be a branch out of the structured block. 1090 // * The point of entry must not be a call to setjmp(). 1091 // * longjmp() and throw() must not violate the entry/exit criteria. 1092 // * An expression statement, iteration statement, selection statement, or 1093 // try block is considered to be a structured block if the corresponding 1094 // compound statement obtained by enclosing it in { and } would be a 1095 // structured block. 1096 // It is analyzed in Sema, so we can just call __kmpc_end_single() on 1097 // fallthrough rather than pushing a normal cleanup for it. 1098 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args); 1099 }); 1100 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 1101 // <copy_func>, did_it); 1102 if (DidIt) { 1103 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 1104 auto CopyprivateArrayTy = 1105 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 1106 /*IndexTypeQuals=*/0); 1107 // Create a list of all private variables for copyprivate. 1108 auto *CopyprivateList = 1109 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 1110 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 1111 auto *Elem = CGF.Builder.CreateStructGEP(CopyprivateList, I); 1112 CGF.Builder.CreateAlignedStore( 1113 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1114 CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy), 1115 Elem, CGM.PointerAlignInBytes); 1116 } 1117 // Build function that copies private values from single region to all other 1118 // threads in the corresponding parallel region. 1119 auto *CpyFn = emitCopyprivateCopyFunction( 1120 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 1121 SrcExprs, DstExprs, AssignmentOps); 1122 auto *BufSize = CGF.Builder.getInt32( 1123 C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); 1124 auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 1125 CGF.VoidPtrTy); 1126 auto *DidItVal = 1127 CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes); 1128 llvm::Value *Args[] = { 1129 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 1130 getThreadID(CGF, Loc), // i32 <gtid> 1131 BufSize, // i32 <buf_size> 1132 CL, // void *<copyprivate list> 1133 CpyFn, // void (*) (void *, void *) <copy_func> 1134 DidItVal // i32 did_it 1135 }; 1136 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 1137 } 1138 } 1139 1140 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 1141 bool IsExplicit) { 1142 // Build call __kmpc_cancel_barrier(loc, thread_id); 1143 auto Flags = static_cast<OpenMPLocationFlags>( 1144 OMP_IDENT_KMPC | 1145 (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL)); 1146 // Build call __kmpc_cancel_barrier(loc, thread_id); 1147 // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this 1148 // one provides the same functionality and adds initial support for 1149 // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier() 1150 // is provided default by the runtime library so it safe to make such 1151 // replacement. 1152 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 1153 getThreadID(CGF, Loc)}; 1154 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 1155 } 1156 1157 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 1158 /// the enum sched_type in kmp.h). 1159 enum OpenMPSchedType { 1160 /// \brief Lower bound for default (unordered) versions. 1161 OMP_sch_lower = 32, 1162 OMP_sch_static_chunked = 33, 1163 OMP_sch_static = 34, 1164 OMP_sch_dynamic_chunked = 35, 1165 OMP_sch_guided_chunked = 36, 1166 OMP_sch_runtime = 37, 1167 OMP_sch_auto = 38, 1168 /// \brief Lower bound for 'ordered' versions. 1169 OMP_ord_lower = 64, 1170 /// \brief Lower bound for 'nomerge' versions. 1171 OMP_nm_lower = 160, 1172 }; 1173 1174 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 1175 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 1176 bool Chunked) { 1177 switch (ScheduleKind) { 1178 case OMPC_SCHEDULE_static: 1179 return Chunked ? OMP_sch_static_chunked : OMP_sch_static; 1180 case OMPC_SCHEDULE_dynamic: 1181 return OMP_sch_dynamic_chunked; 1182 case OMPC_SCHEDULE_guided: 1183 return OMP_sch_guided_chunked; 1184 case OMPC_SCHEDULE_auto: 1185 return OMP_sch_auto; 1186 case OMPC_SCHEDULE_runtime: 1187 return OMP_sch_runtime; 1188 case OMPC_SCHEDULE_unknown: 1189 assert(!Chunked && "chunk was specified but schedule kind not known"); 1190 return OMP_sch_static; 1191 } 1192 llvm_unreachable("Unexpected runtime schedule"); 1193 } 1194 1195 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 1196 bool Chunked) const { 1197 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 1198 return Schedule == OMP_sch_static; 1199 } 1200 1201 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 1202 auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false); 1203 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 1204 return Schedule != OMP_sch_static; 1205 } 1206 1207 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, 1208 OpenMPScheduleClauseKind ScheduleKind, 1209 unsigned IVSize, bool IVSigned, 1210 llvm::Value *IL, llvm::Value *LB, 1211 llvm::Value *UB, llvm::Value *ST, 1212 llvm::Value *Chunk) { 1213 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); 1214 if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) { 1215 // Call __kmpc_dispatch_init( 1216 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 1217 // kmp_int[32|64] lower, kmp_int[32|64] upper, 1218 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 1219 1220 // If the Chunk was not specified in the clause - use default value 1. 1221 if (Chunk == nullptr) 1222 Chunk = CGF.Builder.getIntN(IVSize, 1); 1223 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1224 getThreadID(CGF, Loc), 1225 CGF.Builder.getInt32(Schedule), // Schedule type 1226 CGF.Builder.getIntN(IVSize, 0), // Lower 1227 UB, // Upper 1228 CGF.Builder.getIntN(IVSize, 1), // Stride 1229 Chunk // Chunk 1230 }; 1231 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 1232 } else { 1233 // Call __kmpc_for_static_init( 1234 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 1235 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 1236 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 1237 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 1238 if (Chunk == nullptr) { 1239 assert(Schedule == OMP_sch_static && 1240 "expected static non-chunked schedule"); 1241 // If the Chunk was not specified in the clause - use default value 1. 1242 Chunk = CGF.Builder.getIntN(IVSize, 1); 1243 } else 1244 assert(Schedule == OMP_sch_static_chunked && 1245 "expected static chunked schedule"); 1246 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1247 getThreadID(CGF, Loc), 1248 CGF.Builder.getInt32(Schedule), // Schedule type 1249 IL, // &isLastIter 1250 LB, // &LB 1251 UB, // &UB 1252 ST, // &Stride 1253 CGF.Builder.getIntN(IVSize, 1), // Incr 1254 Chunk // Chunk 1255 }; 1256 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args); 1257 } 1258 } 1259 1260 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc, 1261 OpenMPScheduleClauseKind ScheduleKind) { 1262 assert((ScheduleKind == OMPC_SCHEDULE_static || 1263 ScheduleKind == OMPC_SCHEDULE_unknown) && 1264 "Non-static schedule kinds are not yet implemented"); 1265 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1266 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1267 getThreadID(CGF, Loc)}; 1268 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1269 Args); 1270 } 1271 1272 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 1273 SourceLocation Loc, unsigned IVSize, 1274 bool IVSigned, llvm::Value *IL, 1275 llvm::Value *LB, llvm::Value *UB, 1276 llvm::Value *ST) { 1277 // Call __kmpc_dispatch_next( 1278 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 1279 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 1280 // kmp_int[32|64] *p_stride); 1281 llvm::Value *Args[] = { 1282 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 1283 IL, // &isLastIter 1284 LB, // &Lower 1285 UB, // &Upper 1286 ST // &Stride 1287 }; 1288 llvm::Value *Call = 1289 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 1290 return CGF.EmitScalarConversion( 1291 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), 1292 CGF.getContext().BoolTy); 1293 } 1294 1295 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1296 llvm::Value *NumThreads, 1297 SourceLocation Loc) { 1298 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1299 llvm::Value *Args[] = { 1300 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1301 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1302 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1303 Args); 1304 } 1305 1306 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1307 SourceLocation Loc) { 1308 // Build call void __kmpc_flush(ident_t *loc) 1309 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1310 emitUpdateLocation(CGF, Loc)); 1311 } 1312 1313 namespace { 1314 /// \brief Indexes of fields for type kmp_task_t. 1315 enum KmpTaskTFields { 1316 /// \brief List of shared variables. 1317 KmpTaskTShareds, 1318 /// \brief Task routine. 1319 KmpTaskTRoutine, 1320 /// \brief Partition id for the untied tasks. 1321 KmpTaskTPartId, 1322 /// \brief Function with call of destructors for private variables. 1323 KmpTaskTDestructors, 1324 }; 1325 } // namespace 1326 1327 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 1328 if (!KmpRoutineEntryPtrTy) { 1329 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 1330 auto &C = CGM.getContext(); 1331 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 1332 FunctionProtoType::ExtProtoInfo EPI; 1333 KmpRoutineEntryPtrQTy = C.getPointerType( 1334 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 1335 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 1336 } 1337 } 1338 1339 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1340 QualType FieldTy) { 1341 auto *Field = FieldDecl::Create( 1342 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1343 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1344 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1345 Field->setAccess(AS_public); 1346 DC->addDecl(Field); 1347 } 1348 1349 static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM, 1350 QualType KmpInt32Ty, 1351 QualType KmpRoutineEntryPointerQTy) { 1352 auto &C = CGM.getContext(); 1353 // Build struct kmp_task_t { 1354 // void * shareds; 1355 // kmp_routine_entry_t routine; 1356 // kmp_int32 part_id; 1357 // kmp_routine_entry_t destructors; 1358 // /* private vars */ 1359 // }; 1360 auto *RD = C.buildImplicitRecord("kmp_task_t"); 1361 RD->startDefinition(); 1362 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1363 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1364 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1365 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 1366 // TODO: add private fields. 1367 RD->completeDefinition(); 1368 return C.getRecordType(RD); 1369 } 1370 1371 /// \brief Emit a proxy function which accepts kmp_task_t as the second 1372 /// argument. 1373 /// \code 1374 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 1375 /// TaskFunction(gtid, tt->part_id, tt->shareds); 1376 /// return 0; 1377 /// } 1378 /// \endcode 1379 static llvm::Value * 1380 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 1381 QualType KmpInt32Ty, QualType KmpTaskTPtrQTy, 1382 QualType SharedsPtrTy, llvm::Value *TaskFunction) { 1383 auto &C = CGM.getContext(); 1384 FunctionArgList Args; 1385 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); 1386 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, 1387 /*Id=*/nullptr, KmpTaskTPtrQTy); 1388 Args.push_back(&GtidArg); 1389 Args.push_back(&TaskTypeArg); 1390 FunctionType::ExtInfo Info; 1391 auto &TaskEntryFnInfo = 1392 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info, 1393 /*isVariadic=*/false); 1394 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 1395 auto *TaskEntry = 1396 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, 1397 ".omp_task_entry.", &CGM.getModule()); 1398 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry); 1399 CodeGenFunction CGF(CGM); 1400 CGF.disableDebugInfo(); 1401 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); 1402 1403 // TaskFunction(gtid, tt->part_id, tt->shareds); 1404 auto *GtidParam = CGF.EmitLoadOfScalar( 1405 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, 1406 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); 1407 auto TaskTypeArgAddr = CGF.EmitLoadOfScalar( 1408 CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false, 1409 CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc); 1410 auto *PartidPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr, 1411 /*Idx=*/KmpTaskTPartId); 1412 auto *PartidParam = CGF.EmitLoadOfScalar( 1413 PartidPtr, /*Volatile=*/false, 1414 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc); 1415 auto *SharedsPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr, 1416 /*Idx=*/KmpTaskTShareds); 1417 auto *SharedsParam = 1418 CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false, 1419 CGM.PointerAlignInBytes, C.VoidPtrTy, Loc); 1420 llvm::Value *CallArgs[] = { 1421 GtidParam, PartidParam, 1422 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1423 SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))}; 1424 CGF.EmitCallOrInvoke(TaskFunction, CallArgs); 1425 CGF.EmitStoreThroughLValue( 1426 RValue::get(CGF.Builder.getInt32(/*C=*/0)), 1427 CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 1428 CGF.FinishFunction(); 1429 return TaskEntry; 1430 } 1431 1432 void CGOpenMPRuntime::emitTaskCall( 1433 CodeGenFunction &CGF, SourceLocation Loc, bool Tied, 1434 llvm::PointerIntPair<llvm::Value *, 1, bool> Final, 1435 llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) { 1436 auto &C = CGM.getContext(); 1437 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1438 // Build type kmp_routine_entry_t (if not built yet). 1439 emitKmpRoutineEntryT(KmpInt32Ty); 1440 // Build particular struct kmp_task_t for the given task. 1441 auto KmpTaskQTy = 1442 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy); 1443 QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy); 1444 auto KmpTaskTPtrTy = CGF.ConvertType(KmpTaskQTy)->getPointerTo(); 1445 auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy)); 1446 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 1447 1448 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 1449 // kmp_task_t *tt); 1450 auto *TaskEntry = emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, 1451 SharedsPtrTy, TaskFunction); 1452 1453 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1454 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1455 // kmp_routine_entry_t *task_entry); 1456 // Task flags. Format is taken from 1457 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, 1458 // description of kmp_tasking_flags struct. 1459 const unsigned TiedFlag = 0x1; 1460 const unsigned FinalFlag = 0x2; 1461 unsigned Flags = Tied ? TiedFlag : 0; 1462 auto *TaskFlags = 1463 Final.getPointer() 1464 ? CGF.Builder.CreateSelect(Final.getPointer(), 1465 CGF.Builder.getInt32(FinalFlag), 1466 CGF.Builder.getInt32(/*C=*/0)) 1467 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0); 1468 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 1469 auto SharedsSize = C.getTypeSizeInChars(SharedsTy); 1470 llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), 1471 getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize, 1472 CGM.getSize(SharedsSize), 1473 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1474 TaskEntry, KmpRoutineEntryPtrTy)}; 1475 auto *NewTask = CGF.EmitRuntimeCall( 1476 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 1477 auto *NewTaskNewTaskTTy = 1478 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy); 1479 // Fill the data in the resulting kmp_task_t record. 1480 // Copy shareds if there are any. 1481 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) 1482 CGF.EmitAggregateCopy( 1483 CGF.EmitLoadOfScalar( 1484 CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy, 1485 /*Idx=*/KmpTaskTShareds), 1486 /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc), 1487 Shareds, SharedsTy); 1488 // TODO: generate function with destructors for privates. 1489 // Provide pointer to function with destructors for privates. 1490 CGF.Builder.CreateAlignedStore( 1491 llvm::ConstantPointerNull::get( 1492 cast<llvm::PointerType>(KmpRoutineEntryPtrTy)), 1493 CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy, 1494 /*Idx=*/KmpTaskTDestructors), 1495 CGM.PointerAlignInBytes); 1496 1497 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() 1498 // libcall. 1499 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1500 // *new_task); 1501 llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), 1502 getThreadID(CGF, Loc), NewTask}; 1503 // TODO: add check for untied tasks. 1504 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1505 } 1506 1507 InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII( 1508 CodeGenFunction &CGF, const OMPExecutableDirective &D) 1509 : CGF(CGF) { 1510 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo); 1511 // 1.2.2 OpenMP Language Terminology 1512 // Structured block - An executable statement with a single entry at the 1513 // top and a single exit at the bottom. 1514 // The point of exit cannot be a branch out of the structured block. 1515 // longjmp() and throw() must not violate the entry/exit criteria. 1516 CGF.EHStack.pushTerminate(); 1517 } 1518 1519 InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() { 1520 CGF.EHStack.popTerminate(); 1521 auto *OldCSI = 1522 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 1523 delete CGF.CapturedStmtInfo; 1524 CGF.CapturedStmtInfo = OldCSI; 1525 } 1526 1527