1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/StmtOpenMP.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/IR/CallSite.h" 20 #include "llvm/IR/DerivedTypes.h" 21 #include "llvm/IR/GlobalValue.h" 22 #include "llvm/IR/Value.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include <cassert> 25 26 using namespace clang; 27 using namespace CodeGen; 28 29 namespace { 30 /// \brief API for captured statement code generation in OpenMP constructs. 31 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 32 public: 33 CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS, 34 const VarDecl *ThreadIDVar) 35 : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar), 36 Directive(D) { 37 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 38 } 39 40 /// \brief Gets a variable or parameter for storing global thread id 41 /// inside OpenMP construct. 42 const VarDecl *getThreadIDVariable() const { return ThreadIDVar; } 43 44 /// \brief Gets an LValue for the current ThreadID variable. 45 LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 46 47 static bool classof(const CGCapturedStmtInfo *Info) { 48 return Info->getKind() == CR_OpenMP; 49 } 50 51 /// \brief Emit the captured statement body. 52 void EmitBody(CodeGenFunction &CGF, Stmt *S) override; 53 54 /// \brief Get the name of the capture helper. 55 StringRef getHelperName() const override { return ".omp_outlined."; } 56 57 private: 58 /// \brief A variable or parameter storing global thread id for OpenMP 59 /// constructs. 60 const VarDecl *ThreadIDVar; 61 /// \brief OpenMP executable directive associated with the region. 62 const OMPExecutableDirective &Directive; 63 }; 64 } // namespace 65 66 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 67 return CGF.MakeNaturalAlignAddrLValue( 68 CGF.GetAddrOfLocalVar(ThreadIDVar), 69 CGF.getContext().getPointerType(ThreadIDVar->getType())); 70 } 71 72 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) { 73 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 74 CGF.EmitOMPPrivateClause(Directive, PrivateScope); 75 CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); 76 if (PrivateScope.Privatize()) 77 // Emit implicit barrier to synchronize threads and avoid data races. 78 CGF.CGM.getOpenMPRuntime().EmitOMPBarrierCall(CGF, Directive.getLocStart(), 79 /*IsExplicit=*/false); 80 CGCapturedStmtInfo::EmitBody(CGF, S); 81 } 82 83 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 84 : CGM(CGM), DefaultOpenMPPSource(nullptr) { 85 IdentTy = llvm::StructType::create( 86 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 87 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 88 CGM.Int8PtrTy /* psource */, nullptr); 89 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 90 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 91 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 92 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 93 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 94 } 95 96 llvm::Value * 97 CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D, 98 const VarDecl *ThreadIDVar) { 99 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 100 CodeGenFunction CGF(CGM, true); 101 CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar); 102 CGF.CapturedStmtInfo = &CGInfo; 103 return CGF.GenerateCapturedStmtFunction(*CS); 104 } 105 106 llvm::Value * 107 CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) { 108 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 109 if (!Entry) { 110 if (!DefaultOpenMPPSource) { 111 // Initialize default location for psource field of ident_t structure of 112 // all ident_t objects. Format is ";file;function;line;column;;". 113 // Taken from 114 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 115 DefaultOpenMPPSource = 116 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); 117 DefaultOpenMPPSource = 118 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 119 } 120 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 121 CGM.getModule(), IdentTy, /*isConstant*/ true, 122 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 123 DefaultOpenMPLocation->setUnnamedAddr(true); 124 125 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 126 llvm::Constant *Values[] = {Zero, 127 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 128 Zero, Zero, DefaultOpenMPPSource}; 129 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 130 DefaultOpenMPLocation->setInitializer(Init); 131 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; 132 return DefaultOpenMPLocation; 133 } 134 return Entry; 135 } 136 137 llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation( 138 CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags) { 139 // If no debug info is generated - return global default location. 140 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 141 Loc.isInvalid()) 142 return GetOrCreateDefaultOpenMPLocation(Flags); 143 144 assert(CGF.CurFn && "No function in current CodeGenFunction."); 145 146 llvm::Value *LocValue = nullptr; 147 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 148 if (I != OpenMPLocThreadIDMap.end()) 149 LocValue = I->second.DebugLoc; 150 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 151 // GetOpenMPThreadID was called before this routine. 152 if (LocValue == nullptr) { 153 // Generate "ident_t .kmpc_loc.addr;" 154 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); 155 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); 156 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 157 Elem.second.DebugLoc = AI; 158 LocValue = AI; 159 160 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 161 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 162 CGF.Builder.CreateMemCpy(LocValue, GetOrCreateDefaultOpenMPLocation(Flags), 163 llvm::ConstantExpr::getSizeOf(IdentTy), 164 CGM.PointerAlignInBytes); 165 } 166 167 // char **psource = &.kmpc_loc_<flags>.addr.psource; 168 auto *PSource = 169 CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource); 170 171 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 172 if (OMPDebugLoc == nullptr) { 173 SmallString<128> Buffer2; 174 llvm::raw_svector_ostream OS2(Buffer2); 175 // Build debug location 176 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 177 OS2 << ";" << PLoc.getFilename() << ";"; 178 if (const FunctionDecl *FD = 179 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 180 OS2 << FD->getQualifiedNameAsString(); 181 } 182 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 183 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 184 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 185 } 186 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 187 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 188 189 return LocValue; 190 } 191 192 llvm::Value *CGOpenMPRuntime::GetOpenMPThreadID(CodeGenFunction &CGF, 193 SourceLocation Loc) { 194 assert(CGF.CurFn && "No function in current CodeGenFunction."); 195 196 llvm::Value *ThreadID = nullptr; 197 // Check whether we've already cached a load of the thread id in this 198 // function. 199 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 200 if (I != OpenMPLocThreadIDMap.end()) { 201 ThreadID = I->second.ThreadID; 202 if (ThreadID != nullptr) 203 return ThreadID; 204 } 205 if (auto OMPRegionInfo = 206 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 207 // Check if this an outlined function with thread id passed as argument. 208 auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable(); 209 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 210 auto RVal = CGF.EmitLoadOfLValue(LVal, Loc); 211 LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), 212 ThreadIDVar->getType()); 213 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 214 // If value loaded in entry block, cache it and use it everywhere in 215 // function. 216 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 217 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 218 Elem.second.ThreadID = ThreadID; 219 } 220 } else { 221 // This is not an outlined function region - need to call __kmpc_int32 222 // kmpc_global_thread_num(ident_t *loc). 223 // Generate thread id value and cache this value for use across the 224 // function. 225 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 226 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 227 llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)}; 228 ThreadID = CGF.EmitRuntimeCall( 229 CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args); 230 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 231 Elem.second.ThreadID = ThreadID; 232 } 233 return ThreadID; 234 } 235 236 void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) { 237 assert(CGF.CurFn && "No function in current CodeGenFunction."); 238 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 239 OpenMPLocThreadIDMap.erase(CGF.CurFn); 240 } 241 242 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 243 return llvm::PointerType::getUnqual(IdentTy); 244 } 245 246 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 247 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 248 } 249 250 llvm::Constant * 251 CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) { 252 llvm::Constant *RTLFn = nullptr; 253 switch (Function) { 254 case OMPRTL__kmpc_fork_call: { 255 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 256 // microtask, ...); 257 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 258 getKmpc_MicroPointerTy()}; 259 llvm::FunctionType *FnTy = 260 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 261 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 262 break; 263 } 264 case OMPRTL__kmpc_global_thread_num: { 265 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 266 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 267 llvm::FunctionType *FnTy = 268 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 269 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 270 break; 271 } 272 case OMPRTL__kmpc_threadprivate_cached: { 273 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 274 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 275 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 276 CGM.VoidPtrTy, CGM.SizeTy, 277 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 278 llvm::FunctionType *FnTy = 279 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 280 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 281 break; 282 } 283 case OMPRTL__kmpc_critical: { 284 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 285 // kmp_critical_name *crit); 286 llvm::Type *TypeParams[] = { 287 getIdentTyPointerTy(), CGM.Int32Ty, 288 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 289 llvm::FunctionType *FnTy = 290 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 291 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 292 break; 293 } 294 case OMPRTL__kmpc_threadprivate_register: { 295 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 296 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 297 // typedef void *(*kmpc_ctor)(void *); 298 auto KmpcCtorTy = 299 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 300 /*isVarArg*/ false)->getPointerTo(); 301 // typedef void *(*kmpc_cctor)(void *, void *); 302 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 303 auto KmpcCopyCtorTy = 304 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 305 /*isVarArg*/ false)->getPointerTo(); 306 // typedef void (*kmpc_dtor)(void *); 307 auto KmpcDtorTy = 308 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 309 ->getPointerTo(); 310 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 311 KmpcCopyCtorTy, KmpcDtorTy}; 312 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 313 /*isVarArg*/ false); 314 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 315 break; 316 } 317 case OMPRTL__kmpc_end_critical: { 318 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 319 // kmp_critical_name *crit); 320 llvm::Type *TypeParams[] = { 321 getIdentTyPointerTy(), CGM.Int32Ty, 322 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 323 llvm::FunctionType *FnTy = 324 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 325 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 326 break; 327 } 328 case OMPRTL__kmpc_cancel_barrier: { 329 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 330 // global_tid); 331 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 332 llvm::FunctionType *FnTy = 333 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 334 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 335 break; 336 } 337 // Build __kmpc_for_static_init*( 338 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 339 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 340 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 341 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 342 case OMPRTL__kmpc_for_static_init_4: { 343 auto ITy = CGM.Int32Ty; 344 auto PtrTy = llvm::PointerType::getUnqual(ITy); 345 llvm::Type *TypeParams[] = { 346 getIdentTyPointerTy(), // loc 347 CGM.Int32Ty, // tid 348 CGM.Int32Ty, // schedtype 349 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 350 PtrTy, // p_lower 351 PtrTy, // p_upper 352 PtrTy, // p_stride 353 ITy, // incr 354 ITy // chunk 355 }; 356 llvm::FunctionType *FnTy = 357 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 358 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4"); 359 break; 360 } 361 case OMPRTL__kmpc_for_static_init_4u: { 362 auto ITy = CGM.Int32Ty; 363 auto PtrTy = llvm::PointerType::getUnqual(ITy); 364 llvm::Type *TypeParams[] = { 365 getIdentTyPointerTy(), // loc 366 CGM.Int32Ty, // tid 367 CGM.Int32Ty, // schedtype 368 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 369 PtrTy, // p_lower 370 PtrTy, // p_upper 371 PtrTy, // p_stride 372 ITy, // incr 373 ITy // chunk 374 }; 375 llvm::FunctionType *FnTy = 376 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 377 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u"); 378 break; 379 } 380 case OMPRTL__kmpc_for_static_init_8: { 381 auto ITy = CGM.Int64Ty; 382 auto PtrTy = llvm::PointerType::getUnqual(ITy); 383 llvm::Type *TypeParams[] = { 384 getIdentTyPointerTy(), // loc 385 CGM.Int32Ty, // tid 386 CGM.Int32Ty, // schedtype 387 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 388 PtrTy, // p_lower 389 PtrTy, // p_upper 390 PtrTy, // p_stride 391 ITy, // incr 392 ITy // chunk 393 }; 394 llvm::FunctionType *FnTy = 395 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 396 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8"); 397 break; 398 } 399 case OMPRTL__kmpc_for_static_init_8u: { 400 auto ITy = CGM.Int64Ty; 401 auto PtrTy = llvm::PointerType::getUnqual(ITy); 402 llvm::Type *TypeParams[] = { 403 getIdentTyPointerTy(), // loc 404 CGM.Int32Ty, // tid 405 CGM.Int32Ty, // schedtype 406 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 407 PtrTy, // p_lower 408 PtrTy, // p_upper 409 PtrTy, // p_stride 410 ITy, // incr 411 ITy // chunk 412 }; 413 llvm::FunctionType *FnTy = 414 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 415 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u"); 416 break; 417 } 418 case OMPRTL__kmpc_for_static_fini: { 419 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 420 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 421 llvm::FunctionType *FnTy = 422 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 423 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 424 break; 425 } 426 case OMPRTL__kmpc_push_num_threads: { 427 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 428 // kmp_int32 num_threads) 429 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 430 CGM.Int32Ty}; 431 llvm::FunctionType *FnTy = 432 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 433 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 434 break; 435 } 436 case OMPRTL__kmpc_serialized_parallel: { 437 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 438 // global_tid); 439 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 440 llvm::FunctionType *FnTy = 441 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 442 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 443 break; 444 } 445 case OMPRTL__kmpc_end_serialized_parallel: { 446 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 447 // global_tid); 448 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 449 llvm::FunctionType *FnTy = 450 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 451 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 452 break; 453 } 454 case OMPRTL__kmpc_flush: { 455 // Build void __kmpc_flush(ident_t *loc); 456 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 457 llvm::FunctionType *FnTy = 458 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 459 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 460 break; 461 } 462 case OMPRTL__kmpc_master: { 463 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 464 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 465 llvm::FunctionType *FnTy = 466 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 467 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 468 break; 469 } 470 case OMPRTL__kmpc_end_master: { 471 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 472 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 473 llvm::FunctionType *FnTy = 474 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 475 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 476 break; 477 } 478 case OMPRTL__kmpc_omp_taskyield: { 479 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 480 // int end_part); 481 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 482 llvm::FunctionType *FnTy = 483 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 484 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 485 break; 486 } 487 case OMPRTL__kmpc_single: { 488 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 489 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 490 llvm::FunctionType *FnTy = 491 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 492 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 493 break; 494 } 495 case OMPRTL__kmpc_end_single: { 496 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 497 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 498 llvm::FunctionType *FnTy = 499 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 500 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 501 break; 502 } 503 } 504 return RTLFn; 505 } 506 507 llvm::Constant * 508 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 509 // Lookup the entry, lazily creating it if necessary. 510 return GetOrCreateInternalVariable(CGM.Int8PtrPtrTy, 511 Twine(CGM.getMangledName(VD)) + ".cache."); 512 } 513 514 llvm::Value *CGOpenMPRuntime::getOMPAddrOfThreadPrivate(CodeGenFunction &CGF, 515 const VarDecl *VD, 516 llvm::Value *VDAddr, 517 SourceLocation Loc) { 518 auto VarTy = VDAddr->getType()->getPointerElementType(); 519 llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), 520 GetOpenMPThreadID(CGF, Loc), 521 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), 522 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 523 getOrCreateThreadPrivateCache(VD)}; 524 return CGF.EmitRuntimeCall( 525 CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); 526 } 527 528 void CGOpenMPRuntime::EmitOMPThreadPrivateVarInit( 529 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, 530 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 531 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 532 // library. 533 auto OMPLoc = EmitOpenMPUpdateLocation(CGF, Loc); 534 CGF.EmitRuntimeCall(CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), 535 OMPLoc); 536 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 537 // to register constructor/destructor for variable. 538 llvm::Value *Args[] = {OMPLoc, 539 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), 540 Ctor, CopyCtor, Dtor}; 541 CGF.EmitRuntimeCall( 542 CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 543 } 544 545 llvm::Function *CGOpenMPRuntime::EmitOMPThreadPrivateVarDefinition( 546 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, 547 bool PerformInit, CodeGenFunction *CGF) { 548 VD = VD->getDefinition(CGM.getContext()); 549 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 550 ThreadPrivateWithDefinition.insert(VD); 551 QualType ASTTy = VD->getType(); 552 553 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 554 auto Init = VD->getAnyInitializer(); 555 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 556 // Generate function that re-emits the declaration's initializer into the 557 // threadprivate copy of the variable VD 558 CodeGenFunction CtorCGF(CGM); 559 FunctionArgList Args; 560 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 561 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 562 Args.push_back(&Dst); 563 564 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 565 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 566 /*isVariadic=*/false); 567 auto FTy = CGM.getTypes().GetFunctionType(FI); 568 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 569 FTy, ".__kmpc_global_ctor_.", Loc); 570 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 571 Args, SourceLocation()); 572 auto ArgVal = CtorCGF.EmitLoadOfScalar( 573 CtorCGF.GetAddrOfLocalVar(&Dst), 574 /*Volatile=*/false, CGM.PointerAlignInBytes, 575 CGM.getContext().VoidPtrTy, Dst.getLocation()); 576 auto Arg = CtorCGF.Builder.CreatePointerCast( 577 ArgVal, 578 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); 579 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 580 /*IsInitializer=*/true); 581 ArgVal = CtorCGF.EmitLoadOfScalar( 582 CtorCGF.GetAddrOfLocalVar(&Dst), 583 /*Volatile=*/false, CGM.PointerAlignInBytes, 584 CGM.getContext().VoidPtrTy, Dst.getLocation()); 585 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 586 CtorCGF.FinishFunction(); 587 Ctor = Fn; 588 } 589 if (VD->getType().isDestructedType() != QualType::DK_none) { 590 // Generate function that emits destructor call for the threadprivate copy 591 // of the variable VD 592 CodeGenFunction DtorCGF(CGM); 593 FunctionArgList Args; 594 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 595 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 596 Args.push_back(&Dst); 597 598 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 599 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 600 /*isVariadic=*/false); 601 auto FTy = CGM.getTypes().GetFunctionType(FI); 602 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 603 FTy, ".__kmpc_global_dtor_.", Loc); 604 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 605 SourceLocation()); 606 auto ArgVal = DtorCGF.EmitLoadOfScalar( 607 DtorCGF.GetAddrOfLocalVar(&Dst), 608 /*Volatile=*/false, CGM.PointerAlignInBytes, 609 CGM.getContext().VoidPtrTy, Dst.getLocation()); 610 DtorCGF.emitDestroy(ArgVal, ASTTy, 611 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 612 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 613 DtorCGF.FinishFunction(); 614 Dtor = Fn; 615 } 616 // Do not emit init function if it is not required. 617 if (!Ctor && !Dtor) 618 return nullptr; 619 620 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 621 auto CopyCtorTy = 622 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 623 /*isVarArg=*/false)->getPointerTo(); 624 // Copying constructor for the threadprivate variable. 625 // Must be NULL - reserved by runtime, but currently it requires that this 626 // parameter is always NULL. Otherwise it fires assertion. 627 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 628 if (Ctor == nullptr) { 629 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 630 /*isVarArg=*/false)->getPointerTo(); 631 Ctor = llvm::Constant::getNullValue(CtorTy); 632 } 633 if (Dtor == nullptr) { 634 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 635 /*isVarArg=*/false)->getPointerTo(); 636 Dtor = llvm::Constant::getNullValue(DtorTy); 637 } 638 if (!CGF) { 639 auto InitFunctionTy = 640 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 641 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 642 InitFunctionTy, ".__omp_threadprivate_init_."); 643 CodeGenFunction InitCGF(CGM); 644 FunctionArgList ArgList; 645 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 646 CGM.getTypes().arrangeNullaryFunction(), ArgList, 647 Loc); 648 EmitOMPThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 649 InitCGF.FinishFunction(); 650 return InitFunction; 651 } 652 EmitOMPThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 653 } 654 return nullptr; 655 } 656 657 void CGOpenMPRuntime::EmitOMPParallelCall(CodeGenFunction &CGF, 658 SourceLocation Loc, 659 llvm::Value *OutlinedFn, 660 llvm::Value *CapturedStruct) { 661 // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) 662 llvm::Value *Args[] = { 663 EmitOpenMPUpdateLocation(CGF, Loc), 664 CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument 665 // (there is only one additional argument - 'context') 666 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), 667 CGF.EmitCastToVoidPtr(CapturedStruct)}; 668 auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_fork_call); 669 CGF.EmitRuntimeCall(RTLFn, Args); 670 } 671 672 void CGOpenMPRuntime::EmitOMPSerialCall(CodeGenFunction &CGF, 673 SourceLocation Loc, 674 llvm::Value *OutlinedFn, 675 llvm::Value *CapturedStruct) { 676 auto ThreadID = GetOpenMPThreadID(CGF, Loc); 677 // Build calls: 678 // __kmpc_serialized_parallel(&Loc, GTid); 679 llvm::Value *SerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID}; 680 auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_serialized_parallel); 681 CGF.EmitRuntimeCall(RTLFn, SerArgs); 682 683 // OutlinedFn(>id, &zero, CapturedStruct); 684 auto ThreadIDAddr = EmitThreadIDAddress(CGF, Loc); 685 auto Int32Ty = 686 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 687 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); 688 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 689 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; 690 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 691 692 // __kmpc_end_serialized_parallel(&Loc, GTid); 693 llvm::Value *EndSerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID}; 694 RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel); 695 CGF.EmitRuntimeCall(RTLFn, EndSerArgs); 696 } 697 698 // If we're inside an (outlined) parallel region, use the region info's 699 // thread-ID variable (it is passed in a first argument of the outlined function 700 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 701 // regular serial code region, get thread ID by calling kmp_int32 702 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 703 // return the address of that temp. 704 llvm::Value *CGOpenMPRuntime::EmitThreadIDAddress(CodeGenFunction &CGF, 705 SourceLocation Loc) { 706 if (auto OMPRegionInfo = 707 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 708 return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF), 709 SourceLocation()).getScalarVal(); 710 auto ThreadID = GetOpenMPThreadID(CGF, Loc); 711 auto Int32Ty = 712 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 713 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 714 CGF.EmitStoreOfScalar(ThreadID, 715 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); 716 717 return ThreadIDTemp; 718 } 719 720 llvm::Constant * 721 CGOpenMPRuntime::GetOrCreateInternalVariable(llvm::Type *Ty, 722 const llvm::Twine &Name) { 723 SmallString<256> Buffer; 724 llvm::raw_svector_ostream Out(Buffer); 725 Out << Name; 726 auto RuntimeName = Out.str(); 727 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 728 if (Elem.second) { 729 assert(Elem.second->getType()->getPointerElementType() == Ty && 730 "OMP internal variable has different type than requested"); 731 return &*Elem.second; 732 } 733 734 return Elem.second = new llvm::GlobalVariable( 735 CGM.getModule(), Ty, /*IsConstant*/ false, 736 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 737 Elem.first()); 738 } 739 740 llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) { 741 llvm::Twine Name(".gomp_critical_user_", CriticalName); 742 return GetOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 743 } 744 745 void CGOpenMPRuntime::EmitOMPCriticalRegion( 746 CodeGenFunction &CGF, StringRef CriticalName, 747 const std::function<void()> &CriticalOpGen, SourceLocation Loc) { 748 auto RegionLock = GetCriticalRegionLock(CriticalName); 749 // __kmpc_critical(ident_t *, gtid, Lock); 750 // CriticalOpGen(); 751 // __kmpc_end_critical(ident_t *, gtid, Lock); 752 // Prepare arguments and build a call to __kmpc_critical 753 llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), 754 GetOpenMPThreadID(CGF, Loc), RegionLock}; 755 auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_critical); 756 CGF.EmitRuntimeCall(RTLFn, Args); 757 CriticalOpGen(); 758 // Build a call to __kmpc_end_critical 759 RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_critical); 760 CGF.EmitRuntimeCall(RTLFn, Args); 761 } 762 763 static void EmitOMPIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 764 const std::function<void()> &BodyOpGen) { 765 llvm::Value *CallBool = CGF.EmitScalarConversion( 766 IfCond, 767 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 768 CGF.getContext().BoolTy); 769 770 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 771 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 772 // Generate the branch (If-stmt) 773 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 774 CGF.EmitBlock(ThenBlock); 775 BodyOpGen(); 776 // Emit the rest of bblocks/branches 777 CGF.EmitBranch(ContBlock); 778 CGF.EmitBlock(ContBlock, true); 779 } 780 781 void CGOpenMPRuntime::EmitOMPMasterRegion( 782 CodeGenFunction &CGF, const std::function<void()> &MasterOpGen, 783 SourceLocation Loc) { 784 // if(__kmpc_master(ident_t *, gtid)) { 785 // MasterOpGen(); 786 // __kmpc_end_master(ident_t *, gtid); 787 // } 788 // Prepare arguments and build a call to __kmpc_master 789 llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), 790 GetOpenMPThreadID(CGF, Loc)}; 791 auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_master); 792 auto *IsMaster = CGF.EmitRuntimeCall(RTLFn, Args); 793 EmitOMPIfStmt(CGF, IsMaster, [&]() -> void { 794 MasterOpGen(); 795 // Build a call to __kmpc_end_master. 796 // OpenMP [1.2.2 OpenMP Language Terminology] 797 // For C/C++, an executable statement, possibly compound, with a single 798 // entry at the top and a single exit at the bottom, or an OpenMP construct. 799 // * Access to the structured block must not be the result of a branch. 800 // * The point of exit cannot be a branch out of the structured block. 801 // * The point of entry must not be a call to setjmp(). 802 // * longjmp() and throw() must not violate the entry/exit criteria. 803 // * An expression statement, iteration statement, selection statement, or 804 // try block is considered to be a structured block if the corresponding 805 // compound statement obtained by enclosing it in { and } would be a 806 // structured block. 807 // It is analyzed in Sema, so we can just call __kmpc_end_master() on 808 // fallthrough rather than pushing a normal cleanup for it. 809 RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_master); 810 CGF.EmitRuntimeCall(RTLFn, Args); 811 }); 812 } 813 814 void CGOpenMPRuntime::EmitOMPTaskyieldCall(CodeGenFunction &CGF, 815 SourceLocation Loc) { 816 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 817 llvm::Value *Args[] = { 818 EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc), 819 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 820 auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_omp_taskyield); 821 CGF.EmitRuntimeCall(RTLFn, Args); 822 } 823 824 void CGOpenMPRuntime::EmitOMPSingleRegion( 825 CodeGenFunction &CGF, const std::function<void()> &SingleOpGen, 826 SourceLocation Loc) { 827 // if(__kmpc_single(ident_t *, gtid)) { 828 // SingleOpGen(); 829 // __kmpc_end_single(ident_t *, gtid); 830 // } 831 // Prepare arguments and build a call to __kmpc_single 832 llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), 833 GetOpenMPThreadID(CGF, Loc)}; 834 auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_single); 835 auto *IsSingle = CGF.EmitRuntimeCall(RTLFn, Args); 836 EmitOMPIfStmt(CGF, IsSingle, [&]() -> void { 837 SingleOpGen(); 838 // Build a call to __kmpc_end_single. 839 // OpenMP [1.2.2 OpenMP Language Terminology] 840 // For C/C++, an executable statement, possibly compound, with a single 841 // entry at the top and a single exit at the bottom, or an OpenMP construct. 842 // * Access to the structured block must not be the result of a branch. 843 // * The point of exit cannot be a branch out of the structured block. 844 // * The point of entry must not be a call to setjmp(). 845 // * longjmp() and throw() must not violate the entry/exit criteria. 846 // * An expression statement, iteration statement, selection statement, or 847 // try block is considered to be a structured block if the corresponding 848 // compound statement obtained by enclosing it in { and } would be a 849 // structured block. 850 // It is analyzed in Sema, so we can just call __kmpc_end_single() on 851 // fallthrough rather than pushing a normal cleanup for it. 852 RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_single); 853 CGF.EmitRuntimeCall(RTLFn, Args); 854 }); 855 } 856 857 void CGOpenMPRuntime::EmitOMPBarrierCall(CodeGenFunction &CGF, 858 SourceLocation Loc, bool IsExplicit) { 859 // Build call __kmpc_cancel_barrier(loc, thread_id); 860 auto Flags = static_cast<OpenMPLocationFlags>( 861 OMP_IDENT_KMPC | 862 (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL)); 863 // Build call __kmpc_cancel_barrier(loc, thread_id); 864 // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this 865 // one provides the same functionality and adds initial support for 866 // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier() 867 // is provided default by the runtime library so it safe to make such 868 // replacement. 869 llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, Flags), 870 GetOpenMPThreadID(CGF, Loc)}; 871 auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_cancel_barrier); 872 CGF.EmitRuntimeCall(RTLFn, Args); 873 } 874 875 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 876 /// the enum sched_type in kmp.h). 877 enum OpenMPSchedType { 878 /// \brief Lower bound for default (unordered) versions. 879 OMP_sch_lower = 32, 880 OMP_sch_static_chunked = 33, 881 OMP_sch_static = 34, 882 OMP_sch_dynamic_chunked = 35, 883 OMP_sch_guided_chunked = 36, 884 OMP_sch_runtime = 37, 885 OMP_sch_auto = 38, 886 /// \brief Lower bound for 'ordered' versions. 887 OMP_ord_lower = 64, 888 /// \brief Lower bound for 'nomerge' versions. 889 OMP_nm_lower = 160, 890 }; 891 892 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 893 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 894 bool Chunked) { 895 switch (ScheduleKind) { 896 case OMPC_SCHEDULE_static: 897 return Chunked ? OMP_sch_static_chunked : OMP_sch_static; 898 case OMPC_SCHEDULE_dynamic: 899 return OMP_sch_dynamic_chunked; 900 case OMPC_SCHEDULE_guided: 901 return OMP_sch_guided_chunked; 902 case OMPC_SCHEDULE_auto: 903 return OMP_sch_auto; 904 case OMPC_SCHEDULE_runtime: 905 return OMP_sch_runtime; 906 case OMPC_SCHEDULE_unknown: 907 assert(!Chunked && "chunk was specified but schedule kind not known"); 908 return OMP_sch_static; 909 } 910 llvm_unreachable("Unexpected runtime schedule"); 911 } 912 913 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 914 bool Chunked) const { 915 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 916 return Schedule == OMP_sch_static; 917 } 918 919 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 920 auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false); 921 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 922 return Schedule != OMP_sch_static; 923 } 924 925 void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc, 926 OpenMPScheduleClauseKind ScheduleKind, 927 unsigned IVSize, bool IVSigned, 928 llvm::Value *IL, llvm::Value *LB, 929 llvm::Value *UB, llvm::Value *ST, 930 llvm::Value *Chunk) { 931 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); 932 // Call __kmpc_for_static_init( 933 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 934 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 935 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 936 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 937 // TODO: Implement dynamic schedule. 938 939 // If the Chunk was not specified in the clause - use default value 1. 940 if (Chunk == nullptr) 941 Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1); 942 943 llvm::Value *Args[] = { 944 EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 945 GetOpenMPThreadID(CGF, Loc), 946 CGF.Builder.getInt32(Schedule), // Schedule type 947 IL, // &isLastIter 948 LB, // &LB 949 UB, // &UB 950 ST, // &Stride 951 CGF.Builder.getIntN(IVSize, 1), // Incr 952 Chunk // Chunk 953 }; 954 assert((IVSize == 32 || IVSize == 64) && 955 "Index size is not compatible with the omp runtime"); 956 auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4 957 : OMPRTL__kmpc_for_static_init_4u) 958 : (IVSigned ? OMPRTL__kmpc_for_static_init_8 959 : OMPRTL__kmpc_for_static_init_8u); 960 auto RTLFn = CreateRuntimeFunction(F); 961 CGF.EmitRuntimeCall(RTLFn, Args); 962 } 963 964 void CGOpenMPRuntime::EmitOMPForFinish(CodeGenFunction &CGF, SourceLocation Loc, 965 OpenMPScheduleClauseKind ScheduleKind) { 966 assert((ScheduleKind == OMPC_SCHEDULE_static || 967 ScheduleKind == OMPC_SCHEDULE_unknown) && 968 "Non-static schedule kinds are not yet implemented"); 969 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 970 llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 971 GetOpenMPThreadID(CGF, Loc)}; 972 auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_for_static_fini); 973 CGF.EmitRuntimeCall(RTLFn, Args); 974 } 975 976 void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF, 977 llvm::Value *NumThreads, 978 SourceLocation Loc) { 979 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 980 llvm::Value *Args[] = { 981 EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc), 982 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 983 llvm::Constant *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_push_num_threads); 984 CGF.EmitRuntimeCall(RTLFn, Args); 985 } 986 987 void CGOpenMPRuntime::EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 988 SourceLocation Loc) { 989 // Build call void __kmpc_flush(ident_t *loc) 990 auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush); 991 CGF.EmitRuntimeCall(RTLFn, EmitOpenMPUpdateLocation(CGF, Loc)); 992 } 993