1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/StmtOpenMP.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/IR/CallSite.h" 20 #include "llvm/IR/DerivedTypes.h" 21 #include "llvm/IR/GlobalValue.h" 22 #include "llvm/IR/Value.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include <cassert> 25 26 using namespace clang; 27 using namespace CodeGen; 28 29 namespace { 30 /// \brief Base class for handling code generation inside OpenMP regions. 31 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 32 public: 33 CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS) 34 : CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {} 35 36 CGOpenMPRegionInfo(const OMPExecutableDirective &D) 37 : CGCapturedStmtInfo(CR_OpenMP), Directive(D) {} 38 39 /// \brief Get a variable or parameter for storing global thread id 40 /// inside OpenMP construct. 41 virtual const VarDecl *getThreadIDVariable() const = 0; 42 43 /// \brief Get an LValue for the current ThreadID variable. 44 LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 45 46 /// \brief Emit the captured statement body. 47 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 48 49 static bool classof(const CGCapturedStmtInfo *Info) { 50 return Info->getKind() == CR_OpenMP; 51 } 52 protected: 53 /// \brief OpenMP executable directive associated with the region. 54 const OMPExecutableDirective &Directive; 55 }; 56 57 /// \brief API for captured statement code generation in OpenMP constructs. 58 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { 59 public: 60 CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D, 61 const CapturedStmt &CS, const VarDecl *ThreadIDVar) 62 : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) { 63 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 64 } 65 /// \brief Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const override { 68 return ThreadIDVar; 69 } 70 /// \brief Get the name of the capture helper. 71 StringRef getHelperName() const override { return ".omp_outlined."; } 72 73 private: 74 /// \brief A variable or parameter storing global thread id for OpenMP 75 /// constructs. 76 const VarDecl *ThreadIDVar; 77 }; 78 79 /// \brief API for inlined captured statement code generation in OpenMP 80 /// constructs. 81 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 82 public: 83 CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D, 84 CodeGenFunction::CGCapturedStmtInfo *OldCSI) 85 : CGOpenMPRegionInfo(D), OldCSI(OldCSI), 86 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 87 // \brief Retrieve the value of the context parameter. 88 virtual llvm::Value *getContextValue() const override { 89 if (OuterRegionInfo) 90 return OuterRegionInfo->getContextValue(); 91 llvm_unreachable("No context value for inlined OpenMP region"); 92 } 93 /// \brief Lookup the captured field decl for a variable. 94 virtual const FieldDecl *lookup(const VarDecl *VD) const override { 95 if (OuterRegionInfo) 96 return OuterRegionInfo->lookup(VD); 97 llvm_unreachable("Trying to reference VarDecl that is neither local nor " 98 "captured in outer OpenMP region"); 99 } 100 virtual FieldDecl *getThisFieldDecl() const override { 101 if (OuterRegionInfo) 102 return OuterRegionInfo->getThisFieldDecl(); 103 return nullptr; 104 } 105 /// \brief Get a variable or parameter for storing global thread id 106 /// inside OpenMP construct. 107 virtual const VarDecl *getThreadIDVariable() const override { 108 if (OuterRegionInfo) 109 return OuterRegionInfo->getThreadIDVariable(); 110 return nullptr; 111 } 112 /// \brief Get the name of the capture helper. 113 virtual StringRef getHelperName() const override { 114 llvm_unreachable("No helper name for inlined OpenMP construct"); 115 } 116 117 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 118 119 private: 120 /// \brief CodeGen info about outer OpenMP region. 121 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 122 CGOpenMPRegionInfo *OuterRegionInfo; 123 }; 124 } // namespace 125 126 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 127 return CGF.MakeNaturalAlignAddrLValue( 128 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 129 CGF.getContext().getPointerType(getThreadIDVariable()->getType())); 130 } 131 132 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 133 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 134 CGF.EmitOMPPrivateClause(Directive, PrivateScope); 135 CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); 136 if (PrivateScope.Privatize()) 137 // Emit implicit barrier to synchronize threads and avoid data races. 138 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(), 139 /*IsExplicit=*/false); 140 CGCapturedStmtInfo::EmitBody(CGF, S); 141 } 142 143 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 144 : CGM(CGM), DefaultOpenMPPSource(nullptr) { 145 IdentTy = llvm::StructType::create( 146 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 147 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 148 CGM.Int8PtrTy /* psource */, nullptr); 149 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 150 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 151 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 152 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 153 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 154 } 155 156 llvm::Value * 157 CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D, 158 const VarDecl *ThreadIDVar) { 159 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 160 CodeGenFunction CGF(CGM, true); 161 CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar); 162 CGF.CapturedStmtInfo = &CGInfo; 163 return CGF.GenerateCapturedStmtFunction(*CS); 164 } 165 166 llvm::Value * 167 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 168 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 169 if (!Entry) { 170 if (!DefaultOpenMPPSource) { 171 // Initialize default location for psource field of ident_t structure of 172 // all ident_t objects. Format is ";file;function;line;column;;". 173 // Taken from 174 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 175 DefaultOpenMPPSource = 176 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); 177 DefaultOpenMPPSource = 178 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 179 } 180 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 181 CGM.getModule(), IdentTy, /*isConstant*/ true, 182 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 183 DefaultOpenMPLocation->setUnnamedAddr(true); 184 185 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 186 llvm::Constant *Values[] = {Zero, 187 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 188 Zero, Zero, DefaultOpenMPPSource}; 189 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 190 DefaultOpenMPLocation->setInitializer(Init); 191 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; 192 return DefaultOpenMPLocation; 193 } 194 return Entry; 195 } 196 197 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 198 SourceLocation Loc, 199 OpenMPLocationFlags Flags) { 200 // If no debug info is generated - return global default location. 201 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 202 Loc.isInvalid()) 203 return getOrCreateDefaultLocation(Flags); 204 205 assert(CGF.CurFn && "No function in current CodeGenFunction."); 206 207 llvm::Value *LocValue = nullptr; 208 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 209 if (I != OpenMPLocThreadIDMap.end()) 210 LocValue = I->second.DebugLoc; 211 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 212 // GetOpenMPThreadID was called before this routine. 213 if (LocValue == nullptr) { 214 // Generate "ident_t .kmpc_loc.addr;" 215 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); 216 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); 217 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 218 Elem.second.DebugLoc = AI; 219 LocValue = AI; 220 221 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 222 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 223 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 224 llvm::ConstantExpr::getSizeOf(IdentTy), 225 CGM.PointerAlignInBytes); 226 } 227 228 // char **psource = &.kmpc_loc_<flags>.addr.psource; 229 auto *PSource = 230 CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource); 231 232 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 233 if (OMPDebugLoc == nullptr) { 234 SmallString<128> Buffer2; 235 llvm::raw_svector_ostream OS2(Buffer2); 236 // Build debug location 237 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 238 OS2 << ";" << PLoc.getFilename() << ";"; 239 if (const FunctionDecl *FD = 240 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 241 OS2 << FD->getQualifiedNameAsString(); 242 } 243 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 244 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 245 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 246 } 247 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 248 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 249 250 return LocValue; 251 } 252 253 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 254 SourceLocation Loc) { 255 assert(CGF.CurFn && "No function in current CodeGenFunction."); 256 257 llvm::Value *ThreadID = nullptr; 258 // Check whether we've already cached a load of the thread id in this 259 // function. 260 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 261 if (I != OpenMPLocThreadIDMap.end()) { 262 ThreadID = I->second.ThreadID; 263 if (ThreadID != nullptr) 264 return ThreadID; 265 } 266 if (auto OMPRegionInfo = 267 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 268 if (auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable()) { 269 // Check if this an outlined function with thread id passed as argument. 270 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 271 auto RVal = CGF.EmitLoadOfLValue(LVal, Loc); 272 LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), 273 ThreadIDVar->getType()); 274 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 275 // If value loaded in entry block, cache it and use it everywhere in 276 // function. 277 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 278 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 279 Elem.second.ThreadID = ThreadID; 280 } 281 return ThreadID; 282 } 283 } 284 285 // This is not an outlined function region - need to call __kmpc_int32 286 // kmpc_global_thread_num(ident_t *loc). 287 // Generate thread id value and cache this value for use across the 288 // function. 289 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 290 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 291 ThreadID = 292 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 293 emitUpdateLocation(CGF, Loc)); 294 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 295 Elem.second.ThreadID = ThreadID; 296 return ThreadID; 297 } 298 299 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 300 assert(CGF.CurFn && "No function in current CodeGenFunction."); 301 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 302 OpenMPLocThreadIDMap.erase(CGF.CurFn); 303 } 304 305 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 306 return llvm::PointerType::getUnqual(IdentTy); 307 } 308 309 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 310 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 311 } 312 313 llvm::Constant * 314 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 315 llvm::Constant *RTLFn = nullptr; 316 switch (Function) { 317 case OMPRTL__kmpc_fork_call: { 318 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 319 // microtask, ...); 320 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 321 getKmpc_MicroPointerTy()}; 322 llvm::FunctionType *FnTy = 323 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 324 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 325 break; 326 } 327 case OMPRTL__kmpc_global_thread_num: { 328 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 329 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 330 llvm::FunctionType *FnTy = 331 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 332 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 333 break; 334 } 335 case OMPRTL__kmpc_threadprivate_cached: { 336 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 337 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 338 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 339 CGM.VoidPtrTy, CGM.SizeTy, 340 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 341 llvm::FunctionType *FnTy = 342 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 343 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 344 break; 345 } 346 case OMPRTL__kmpc_critical: { 347 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 348 // kmp_critical_name *crit); 349 llvm::Type *TypeParams[] = { 350 getIdentTyPointerTy(), CGM.Int32Ty, 351 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 352 llvm::FunctionType *FnTy = 353 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 354 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 355 break; 356 } 357 case OMPRTL__kmpc_threadprivate_register: { 358 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 359 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 360 // typedef void *(*kmpc_ctor)(void *); 361 auto KmpcCtorTy = 362 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 363 /*isVarArg*/ false)->getPointerTo(); 364 // typedef void *(*kmpc_cctor)(void *, void *); 365 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 366 auto KmpcCopyCtorTy = 367 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 368 /*isVarArg*/ false)->getPointerTo(); 369 // typedef void (*kmpc_dtor)(void *); 370 auto KmpcDtorTy = 371 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 372 ->getPointerTo(); 373 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 374 KmpcCopyCtorTy, KmpcDtorTy}; 375 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 376 /*isVarArg*/ false); 377 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 378 break; 379 } 380 case OMPRTL__kmpc_end_critical: { 381 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 382 // kmp_critical_name *crit); 383 llvm::Type *TypeParams[] = { 384 getIdentTyPointerTy(), CGM.Int32Ty, 385 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 386 llvm::FunctionType *FnTy = 387 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 388 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 389 break; 390 } 391 case OMPRTL__kmpc_cancel_barrier: { 392 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 393 // global_tid); 394 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 395 llvm::FunctionType *FnTy = 396 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 397 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 398 break; 399 } 400 // Build __kmpc_for_static_init*( 401 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 402 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 403 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 404 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 405 case OMPRTL__kmpc_for_static_init_4: { 406 auto ITy = CGM.Int32Ty; 407 auto PtrTy = llvm::PointerType::getUnqual(ITy); 408 llvm::Type *TypeParams[] = { 409 getIdentTyPointerTy(), // loc 410 CGM.Int32Ty, // tid 411 CGM.Int32Ty, // schedtype 412 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 413 PtrTy, // p_lower 414 PtrTy, // p_upper 415 PtrTy, // p_stride 416 ITy, // incr 417 ITy // chunk 418 }; 419 llvm::FunctionType *FnTy = 420 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 421 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4"); 422 break; 423 } 424 case OMPRTL__kmpc_for_static_init_4u: { 425 auto ITy = CGM.Int32Ty; 426 auto PtrTy = llvm::PointerType::getUnqual(ITy); 427 llvm::Type *TypeParams[] = { 428 getIdentTyPointerTy(), // loc 429 CGM.Int32Ty, // tid 430 CGM.Int32Ty, // schedtype 431 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 432 PtrTy, // p_lower 433 PtrTy, // p_upper 434 PtrTy, // p_stride 435 ITy, // incr 436 ITy // chunk 437 }; 438 llvm::FunctionType *FnTy = 439 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 440 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u"); 441 break; 442 } 443 case OMPRTL__kmpc_for_static_init_8: { 444 auto ITy = CGM.Int64Ty; 445 auto PtrTy = llvm::PointerType::getUnqual(ITy); 446 llvm::Type *TypeParams[] = { 447 getIdentTyPointerTy(), // loc 448 CGM.Int32Ty, // tid 449 CGM.Int32Ty, // schedtype 450 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 451 PtrTy, // p_lower 452 PtrTy, // p_upper 453 PtrTy, // p_stride 454 ITy, // incr 455 ITy // chunk 456 }; 457 llvm::FunctionType *FnTy = 458 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 459 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8"); 460 break; 461 } 462 case OMPRTL__kmpc_for_static_init_8u: { 463 auto ITy = CGM.Int64Ty; 464 auto PtrTy = llvm::PointerType::getUnqual(ITy); 465 llvm::Type *TypeParams[] = { 466 getIdentTyPointerTy(), // loc 467 CGM.Int32Ty, // tid 468 CGM.Int32Ty, // schedtype 469 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 470 PtrTy, // p_lower 471 PtrTy, // p_upper 472 PtrTy, // p_stride 473 ITy, // incr 474 ITy // chunk 475 }; 476 llvm::FunctionType *FnTy = 477 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 478 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u"); 479 break; 480 } 481 case OMPRTL__kmpc_for_static_fini: { 482 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 483 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 484 llvm::FunctionType *FnTy = 485 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 486 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 487 break; 488 } 489 case OMPRTL__kmpc_push_num_threads: { 490 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 491 // kmp_int32 num_threads) 492 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 493 CGM.Int32Ty}; 494 llvm::FunctionType *FnTy = 495 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 496 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 497 break; 498 } 499 case OMPRTL__kmpc_serialized_parallel: { 500 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 501 // global_tid); 502 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 503 llvm::FunctionType *FnTy = 504 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 505 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 506 break; 507 } 508 case OMPRTL__kmpc_end_serialized_parallel: { 509 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 510 // global_tid); 511 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 512 llvm::FunctionType *FnTy = 513 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 514 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 515 break; 516 } 517 case OMPRTL__kmpc_flush: { 518 // Build void __kmpc_flush(ident_t *loc); 519 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 520 llvm::FunctionType *FnTy = 521 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 522 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 523 break; 524 } 525 case OMPRTL__kmpc_master: { 526 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 527 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 528 llvm::FunctionType *FnTy = 529 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 530 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 531 break; 532 } 533 case OMPRTL__kmpc_end_master: { 534 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 535 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 536 llvm::FunctionType *FnTy = 537 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 538 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 539 break; 540 } 541 case OMPRTL__kmpc_omp_taskyield: { 542 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 543 // int end_part); 544 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 545 llvm::FunctionType *FnTy = 546 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 547 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 548 break; 549 } 550 case OMPRTL__kmpc_single: { 551 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 552 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 553 llvm::FunctionType *FnTy = 554 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 555 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 556 break; 557 } 558 case OMPRTL__kmpc_end_single: { 559 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 560 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 561 llvm::FunctionType *FnTy = 562 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 563 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 564 break; 565 } 566 } 567 return RTLFn; 568 } 569 570 llvm::Constant * 571 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 572 // Lookup the entry, lazily creating it if necessary. 573 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 574 Twine(CGM.getMangledName(VD)) + ".cache."); 575 } 576 577 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 578 const VarDecl *VD, 579 llvm::Value *VDAddr, 580 SourceLocation Loc) { 581 auto VarTy = VDAddr->getType()->getPointerElementType(); 582 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 583 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), 584 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 585 getOrCreateThreadPrivateCache(VD)}; 586 return CGF.EmitRuntimeCall( 587 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); 588 } 589 590 void CGOpenMPRuntime::emitThreadPrivateVarInit( 591 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, 592 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 593 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 594 // library. 595 auto OMPLoc = emitUpdateLocation(CGF, Loc); 596 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 597 OMPLoc); 598 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 599 // to register constructor/destructor for variable. 600 llvm::Value *Args[] = {OMPLoc, 601 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), 602 Ctor, CopyCtor, Dtor}; 603 CGF.EmitRuntimeCall( 604 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 605 } 606 607 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 608 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, 609 bool PerformInit, CodeGenFunction *CGF) { 610 VD = VD->getDefinition(CGM.getContext()); 611 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 612 ThreadPrivateWithDefinition.insert(VD); 613 QualType ASTTy = VD->getType(); 614 615 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 616 auto Init = VD->getAnyInitializer(); 617 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 618 // Generate function that re-emits the declaration's initializer into the 619 // threadprivate copy of the variable VD 620 CodeGenFunction CtorCGF(CGM); 621 FunctionArgList Args; 622 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 623 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 624 Args.push_back(&Dst); 625 626 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 627 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 628 /*isVariadic=*/false); 629 auto FTy = CGM.getTypes().GetFunctionType(FI); 630 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 631 FTy, ".__kmpc_global_ctor_.", Loc); 632 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 633 Args, SourceLocation()); 634 auto ArgVal = CtorCGF.EmitLoadOfScalar( 635 CtorCGF.GetAddrOfLocalVar(&Dst), 636 /*Volatile=*/false, CGM.PointerAlignInBytes, 637 CGM.getContext().VoidPtrTy, Dst.getLocation()); 638 auto Arg = CtorCGF.Builder.CreatePointerCast( 639 ArgVal, 640 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); 641 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 642 /*IsInitializer=*/true); 643 ArgVal = CtorCGF.EmitLoadOfScalar( 644 CtorCGF.GetAddrOfLocalVar(&Dst), 645 /*Volatile=*/false, CGM.PointerAlignInBytes, 646 CGM.getContext().VoidPtrTy, Dst.getLocation()); 647 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 648 CtorCGF.FinishFunction(); 649 Ctor = Fn; 650 } 651 if (VD->getType().isDestructedType() != QualType::DK_none) { 652 // Generate function that emits destructor call for the threadprivate copy 653 // of the variable VD 654 CodeGenFunction DtorCGF(CGM); 655 FunctionArgList Args; 656 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 657 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 658 Args.push_back(&Dst); 659 660 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 661 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 662 /*isVariadic=*/false); 663 auto FTy = CGM.getTypes().GetFunctionType(FI); 664 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 665 FTy, ".__kmpc_global_dtor_.", Loc); 666 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 667 SourceLocation()); 668 auto ArgVal = DtorCGF.EmitLoadOfScalar( 669 DtorCGF.GetAddrOfLocalVar(&Dst), 670 /*Volatile=*/false, CGM.PointerAlignInBytes, 671 CGM.getContext().VoidPtrTy, Dst.getLocation()); 672 DtorCGF.emitDestroy(ArgVal, ASTTy, 673 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 674 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 675 DtorCGF.FinishFunction(); 676 Dtor = Fn; 677 } 678 // Do not emit init function if it is not required. 679 if (!Ctor && !Dtor) 680 return nullptr; 681 682 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 683 auto CopyCtorTy = 684 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 685 /*isVarArg=*/false)->getPointerTo(); 686 // Copying constructor for the threadprivate variable. 687 // Must be NULL - reserved by runtime, but currently it requires that this 688 // parameter is always NULL. Otherwise it fires assertion. 689 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 690 if (Ctor == nullptr) { 691 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 692 /*isVarArg=*/false)->getPointerTo(); 693 Ctor = llvm::Constant::getNullValue(CtorTy); 694 } 695 if (Dtor == nullptr) { 696 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 697 /*isVarArg=*/false)->getPointerTo(); 698 Dtor = llvm::Constant::getNullValue(DtorTy); 699 } 700 if (!CGF) { 701 auto InitFunctionTy = 702 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 703 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 704 InitFunctionTy, ".__omp_threadprivate_init_."); 705 CodeGenFunction InitCGF(CGM); 706 FunctionArgList ArgList; 707 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 708 CGM.getTypes().arrangeNullaryFunction(), ArgList, 709 Loc); 710 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 711 InitCGF.FinishFunction(); 712 return InitFunction; 713 } 714 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 715 } 716 return nullptr; 717 } 718 719 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 720 llvm::Value *OutlinedFn, 721 llvm::Value *CapturedStruct) { 722 // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) 723 llvm::Value *Args[] = { 724 emitUpdateLocation(CGF, Loc), 725 CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument 726 // (there is only one additional argument - 'context') 727 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), 728 CGF.EmitCastToVoidPtr(CapturedStruct)}; 729 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 730 CGF.EmitRuntimeCall(RTLFn, Args); 731 } 732 733 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc, 734 llvm::Value *OutlinedFn, 735 llvm::Value *CapturedStruct) { 736 auto ThreadID = getThreadID(CGF, Loc); 737 // Build calls: 738 // __kmpc_serialized_parallel(&Loc, GTid); 739 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 740 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 741 Args); 742 743 // OutlinedFn(>id, &zero, CapturedStruct); 744 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 745 auto Int32Ty = 746 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 747 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); 748 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 749 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; 750 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 751 752 // __kmpc_end_serialized_parallel(&Loc, GTid); 753 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 754 CGF.EmitRuntimeCall( 755 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 756 } 757 758 // If we're inside an (outlined) parallel region, use the region info's 759 // thread-ID variable (it is passed in a first argument of the outlined function 760 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 761 // regular serial code region, get thread ID by calling kmp_int32 762 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 763 // return the address of that temp. 764 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 765 SourceLocation Loc) { 766 if (auto OMPRegionInfo = 767 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 768 if (OMPRegionInfo->getThreadIDVariable()) 769 return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF), 770 Loc).getScalarVal(); 771 772 auto ThreadID = getThreadID(CGF, Loc); 773 auto Int32Ty = 774 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 775 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 776 CGF.EmitStoreOfScalar(ThreadID, 777 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); 778 779 return ThreadIDTemp; 780 } 781 782 llvm::Constant * 783 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 784 const llvm::Twine &Name) { 785 SmallString<256> Buffer; 786 llvm::raw_svector_ostream Out(Buffer); 787 Out << Name; 788 auto RuntimeName = Out.str(); 789 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 790 if (Elem.second) { 791 assert(Elem.second->getType()->getPointerElementType() == Ty && 792 "OMP internal variable has different type than requested"); 793 return &*Elem.second; 794 } 795 796 return Elem.second = new llvm::GlobalVariable( 797 CGM.getModule(), Ty, /*IsConstant*/ false, 798 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 799 Elem.first()); 800 } 801 802 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 803 llvm::Twine Name(".gomp_critical_user_", CriticalName); 804 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 805 } 806 807 void CGOpenMPRuntime::emitCriticalRegion( 808 CodeGenFunction &CGF, StringRef CriticalName, 809 const std::function<void()> &CriticalOpGen, SourceLocation Loc) { 810 auto RegionLock = getCriticalRegionLock(CriticalName); 811 // __kmpc_critical(ident_t *, gtid, Lock); 812 // CriticalOpGen(); 813 // __kmpc_end_critical(ident_t *, gtid, Lock); 814 // Prepare arguments and build a call to __kmpc_critical 815 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 816 RegionLock}; 817 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 818 CriticalOpGen(); 819 // Build a call to __kmpc_end_critical 820 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 821 } 822 823 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 824 const std::function<void()> &BodyOpGen) { 825 llvm::Value *CallBool = CGF.EmitScalarConversion( 826 IfCond, 827 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 828 CGF.getContext().BoolTy); 829 830 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 831 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 832 // Generate the branch (If-stmt) 833 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 834 CGF.EmitBlock(ThenBlock); 835 BodyOpGen(); 836 // Emit the rest of bblocks/branches 837 CGF.EmitBranch(ContBlock); 838 CGF.EmitBlock(ContBlock, true); 839 } 840 841 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 842 const std::function<void()> &MasterOpGen, 843 SourceLocation Loc) { 844 // if(__kmpc_master(ident_t *, gtid)) { 845 // MasterOpGen(); 846 // __kmpc_end_master(ident_t *, gtid); 847 // } 848 // Prepare arguments and build a call to __kmpc_master 849 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 850 auto *IsMaster = 851 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 852 emitIfStmt(CGF, IsMaster, [&]() -> void { 853 MasterOpGen(); 854 // Build a call to __kmpc_end_master. 855 // OpenMP [1.2.2 OpenMP Language Terminology] 856 // For C/C++, an executable statement, possibly compound, with a single 857 // entry at the top and a single exit at the bottom, or an OpenMP construct. 858 // * Access to the structured block must not be the result of a branch. 859 // * The point of exit cannot be a branch out of the structured block. 860 // * The point of entry must not be a call to setjmp(). 861 // * longjmp() and throw() must not violate the entry/exit criteria. 862 // * An expression statement, iteration statement, selection statement, or 863 // try block is considered to be a structured block if the corresponding 864 // compound statement obtained by enclosing it in { and } would be a 865 // structured block. 866 // It is analyzed in Sema, so we can just call __kmpc_end_master() on 867 // fallthrough rather than pushing a normal cleanup for it. 868 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args); 869 }); 870 } 871 872 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 873 SourceLocation Loc) { 874 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 875 llvm::Value *Args[] = { 876 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 877 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 878 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 879 } 880 881 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 882 const std::function<void()> &SingleOpGen, 883 SourceLocation Loc) { 884 // if(__kmpc_single(ident_t *, gtid)) { 885 // SingleOpGen(); 886 // __kmpc_end_single(ident_t *, gtid); 887 // } 888 // Prepare arguments and build a call to __kmpc_single 889 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 890 auto *IsSingle = 891 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 892 emitIfStmt(CGF, IsSingle, [&]() -> void { 893 SingleOpGen(); 894 // Build a call to __kmpc_end_single. 895 // OpenMP [1.2.2 OpenMP Language Terminology] 896 // For C/C++, an executable statement, possibly compound, with a single 897 // entry at the top and a single exit at the bottom, or an OpenMP construct. 898 // * Access to the structured block must not be the result of a branch. 899 // * The point of exit cannot be a branch out of the structured block. 900 // * The point of entry must not be a call to setjmp(). 901 // * longjmp() and throw() must not violate the entry/exit criteria. 902 // * An expression statement, iteration statement, selection statement, or 903 // try block is considered to be a structured block if the corresponding 904 // compound statement obtained by enclosing it in { and } would be a 905 // structured block. 906 // It is analyzed in Sema, so we can just call __kmpc_end_single() on 907 // fallthrough rather than pushing a normal cleanup for it. 908 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args); 909 }); 910 } 911 912 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 913 bool IsExplicit) { 914 // Build call __kmpc_cancel_barrier(loc, thread_id); 915 auto Flags = static_cast<OpenMPLocationFlags>( 916 OMP_IDENT_KMPC | 917 (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL)); 918 // Build call __kmpc_cancel_barrier(loc, thread_id); 919 // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this 920 // one provides the same functionality and adds initial support for 921 // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier() 922 // is provided default by the runtime library so it safe to make such 923 // replacement. 924 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 925 getThreadID(CGF, Loc)}; 926 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 927 } 928 929 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 930 /// the enum sched_type in kmp.h). 931 enum OpenMPSchedType { 932 /// \brief Lower bound for default (unordered) versions. 933 OMP_sch_lower = 32, 934 OMP_sch_static_chunked = 33, 935 OMP_sch_static = 34, 936 OMP_sch_dynamic_chunked = 35, 937 OMP_sch_guided_chunked = 36, 938 OMP_sch_runtime = 37, 939 OMP_sch_auto = 38, 940 /// \brief Lower bound for 'ordered' versions. 941 OMP_ord_lower = 64, 942 /// \brief Lower bound for 'nomerge' versions. 943 OMP_nm_lower = 160, 944 }; 945 946 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 947 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 948 bool Chunked) { 949 switch (ScheduleKind) { 950 case OMPC_SCHEDULE_static: 951 return Chunked ? OMP_sch_static_chunked : OMP_sch_static; 952 case OMPC_SCHEDULE_dynamic: 953 return OMP_sch_dynamic_chunked; 954 case OMPC_SCHEDULE_guided: 955 return OMP_sch_guided_chunked; 956 case OMPC_SCHEDULE_auto: 957 return OMP_sch_auto; 958 case OMPC_SCHEDULE_runtime: 959 return OMP_sch_runtime; 960 case OMPC_SCHEDULE_unknown: 961 assert(!Chunked && "chunk was specified but schedule kind not known"); 962 return OMP_sch_static; 963 } 964 llvm_unreachable("Unexpected runtime schedule"); 965 } 966 967 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 968 bool Chunked) const { 969 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 970 return Schedule == OMP_sch_static; 971 } 972 973 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 974 auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false); 975 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 976 return Schedule != OMP_sch_static; 977 } 978 979 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, 980 OpenMPScheduleClauseKind ScheduleKind, 981 unsigned IVSize, bool IVSigned, 982 llvm::Value *IL, llvm::Value *LB, 983 llvm::Value *UB, llvm::Value *ST, 984 llvm::Value *Chunk) { 985 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); 986 // Call __kmpc_for_static_init( 987 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 988 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 989 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 990 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 991 // TODO: Implement dynamic schedule. 992 993 // If the Chunk was not specified in the clause - use default value 1. 994 if (Chunk == nullptr) 995 Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1); 996 997 llvm::Value *Args[] = { 998 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 999 CGF.Builder.getInt32(Schedule), // Schedule type 1000 IL, // &isLastIter 1001 LB, // &LB 1002 UB, // &UB 1003 ST, // &Stride 1004 CGF.Builder.getIntN(IVSize, 1), // Incr 1005 Chunk // Chunk 1006 }; 1007 assert((IVSize == 32 || IVSize == 64) && 1008 "Index size is not compatible with the omp runtime"); 1009 auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4 1010 : OMPRTL__kmpc_for_static_init_4u) 1011 : (IVSigned ? OMPRTL__kmpc_for_static_init_8 1012 : OMPRTL__kmpc_for_static_init_8u); 1013 CGF.EmitRuntimeCall(createRuntimeFunction(F), Args); 1014 } 1015 1016 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc, 1017 OpenMPScheduleClauseKind ScheduleKind) { 1018 assert((ScheduleKind == OMPC_SCHEDULE_static || 1019 ScheduleKind == OMPC_SCHEDULE_unknown) && 1020 "Non-static schedule kinds are not yet implemented"); 1021 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 1022 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 1023 getThreadID(CGF, Loc)}; 1024 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 1025 Args); 1026 } 1027 1028 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 1029 llvm::Value *NumThreads, 1030 SourceLocation Loc) { 1031 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 1032 llvm::Value *Args[] = { 1033 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1034 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 1035 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 1036 Args); 1037 } 1038 1039 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 1040 SourceLocation Loc) { 1041 // Build call void __kmpc_flush(ident_t *loc) 1042 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 1043 emitUpdateLocation(CGF, Loc)); 1044 } 1045 1046 InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII( 1047 CodeGenFunction &CGF, const OMPExecutableDirective &D) 1048 : CGF(CGF) { 1049 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo); 1050 } 1051 1052 InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() { 1053 auto *OldCSI = 1054 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 1055 delete CGF.CapturedStmtInfo; 1056 CGF.CapturedStmtInfo = OldCSI; 1057 } 1058 1059