1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/StmtOpenMP.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/IR/CallSite.h" 20 #include "llvm/IR/DerivedTypes.h" 21 #include "llvm/IR/GlobalValue.h" 22 #include "llvm/IR/Value.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include <cassert> 25 26 using namespace clang; 27 using namespace CodeGen; 28 29 namespace { 30 /// \brief API for captured statement code generation in OpenMP constructs. 31 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 32 public: 33 CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS, 34 const VarDecl *ThreadIDVar) 35 : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar), 36 Directive(D) { 37 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 38 } 39 40 /// \brief Gets a variable or parameter for storing global thread id 41 /// inside OpenMP construct. 42 const VarDecl *getThreadIDVariable() const { return ThreadIDVar; } 43 44 /// \brief Gets an LValue for the current ThreadID variable. 45 LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 46 47 static bool classof(const CGCapturedStmtInfo *Info) { 48 return Info->getKind() == CR_OpenMP; 49 } 50 51 /// \brief Emit the captured statement body. 52 void EmitBody(CodeGenFunction &CGF, Stmt *S) override; 53 54 /// \brief Get the name of the capture helper. 55 StringRef getHelperName() const override { return ".omp_outlined."; } 56 57 private: 58 /// \brief A variable or parameter storing global thread id for OpenMP 59 /// constructs. 60 const VarDecl *ThreadIDVar; 61 /// \brief OpenMP executable directive associated with the region. 62 const OMPExecutableDirective &Directive; 63 }; 64 } // namespace 65 66 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 67 return CGF.MakeNaturalAlignAddrLValue( 68 CGF.GetAddrOfLocalVar(ThreadIDVar), 69 CGF.getContext().getPointerType(ThreadIDVar->getType())); 70 } 71 72 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) { 73 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 74 CGF.EmitOMPPrivateClause(Directive, PrivateScope); 75 CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); 76 if (PrivateScope.Privatize()) 77 // Emit implicit barrier to synchronize threads and avoid data races. 78 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(), 79 /*IsExplicit=*/false); 80 CGCapturedStmtInfo::EmitBody(CGF, S); 81 } 82 83 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 84 : CGM(CGM), DefaultOpenMPPSource(nullptr) { 85 IdentTy = llvm::StructType::create( 86 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, 87 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, 88 CGM.Int8PtrTy /* psource */, nullptr); 89 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 90 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 91 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 92 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 93 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 94 } 95 96 llvm::Value * 97 CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D, 98 const VarDecl *ThreadIDVar) { 99 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); 100 CodeGenFunction CGF(CGM, true); 101 CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar); 102 CGF.CapturedStmtInfo = &CGInfo; 103 return CGF.GenerateCapturedStmtFunction(*CS); 104 } 105 106 llvm::Value * 107 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) { 108 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); 109 if (!Entry) { 110 if (!DefaultOpenMPPSource) { 111 // Initialize default location for psource field of ident_t structure of 112 // all ident_t objects. Format is ";file;function;line;column;;". 113 // Taken from 114 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c 115 DefaultOpenMPPSource = 116 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;"); 117 DefaultOpenMPPSource = 118 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 119 } 120 auto DefaultOpenMPLocation = new llvm::GlobalVariable( 121 CGM.getModule(), IdentTy, /*isConstant*/ true, 122 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); 123 DefaultOpenMPLocation->setUnnamedAddr(true); 124 125 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); 126 llvm::Constant *Values[] = {Zero, 127 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 128 Zero, Zero, DefaultOpenMPPSource}; 129 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); 130 DefaultOpenMPLocation->setInitializer(Init); 131 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; 132 return DefaultOpenMPLocation; 133 } 134 return Entry; 135 } 136 137 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 138 SourceLocation Loc, 139 OpenMPLocationFlags Flags) { 140 // If no debug info is generated - return global default location. 141 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo || 142 Loc.isInvalid()) 143 return getOrCreateDefaultLocation(Flags); 144 145 assert(CGF.CurFn && "No function in current CodeGenFunction."); 146 147 llvm::Value *LocValue = nullptr; 148 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 149 if (I != OpenMPLocThreadIDMap.end()) 150 LocValue = I->second.DebugLoc; 151 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 152 // GetOpenMPThreadID was called before this routine. 153 if (LocValue == nullptr) { 154 // Generate "ident_t .kmpc_loc.addr;" 155 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); 156 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); 157 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 158 Elem.second.DebugLoc = AI; 159 LocValue = AI; 160 161 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 162 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 163 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 164 llvm::ConstantExpr::getSizeOf(IdentTy), 165 CGM.PointerAlignInBytes); 166 } 167 168 // char **psource = &.kmpc_loc_<flags>.addr.psource; 169 auto *PSource = 170 CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource); 171 172 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 173 if (OMPDebugLoc == nullptr) { 174 SmallString<128> Buffer2; 175 llvm::raw_svector_ostream OS2(Buffer2); 176 // Build debug location 177 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 178 OS2 << ";" << PLoc.getFilename() << ";"; 179 if (const FunctionDecl *FD = 180 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { 181 OS2 << FD->getQualifiedNameAsString(); 182 } 183 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 184 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 185 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 186 } 187 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 188 CGF.Builder.CreateStore(OMPDebugLoc, PSource); 189 190 return LocValue; 191 } 192 193 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 194 SourceLocation Loc) { 195 assert(CGF.CurFn && "No function in current CodeGenFunction."); 196 197 llvm::Value *ThreadID = nullptr; 198 // Check whether we've already cached a load of the thread id in this 199 // function. 200 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 201 if (I != OpenMPLocThreadIDMap.end()) { 202 ThreadID = I->second.ThreadID; 203 if (ThreadID != nullptr) 204 return ThreadID; 205 } 206 if (auto OMPRegionInfo = 207 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 208 // Check if this an outlined function with thread id passed as argument. 209 auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable(); 210 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 211 auto RVal = CGF.EmitLoadOfLValue(LVal, Loc); 212 LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), 213 ThreadIDVar->getType()); 214 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); 215 // If value loaded in entry block, cache it and use it everywhere in 216 // function. 217 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 218 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 219 Elem.second.ThreadID = ThreadID; 220 } 221 } else { 222 // This is not an outlined function region - need to call __kmpc_int32 223 // kmpc_global_thread_num(ident_t *loc). 224 // Generate thread id value and cache this value for use across the 225 // function. 226 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 227 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); 228 ThreadID = CGF.EmitRuntimeCall( 229 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 230 emitUpdateLocation(CGF, Loc)); 231 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 232 Elem.second.ThreadID = ThreadID; 233 } 234 return ThreadID; 235 } 236 237 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 238 assert(CGF.CurFn && "No function in current CodeGenFunction."); 239 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) 240 OpenMPLocThreadIDMap.erase(CGF.CurFn); 241 } 242 243 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 244 return llvm::PointerType::getUnqual(IdentTy); 245 } 246 247 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 248 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 249 } 250 251 llvm::Constant * 252 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { 253 llvm::Constant *RTLFn = nullptr; 254 switch (Function) { 255 case OMPRTL__kmpc_fork_call: { 256 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 257 // microtask, ...); 258 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 259 getKmpc_MicroPointerTy()}; 260 llvm::FunctionType *FnTy = 261 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 262 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 263 break; 264 } 265 case OMPRTL__kmpc_global_thread_num: { 266 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 267 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 268 llvm::FunctionType *FnTy = 269 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 270 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 271 break; 272 } 273 case OMPRTL__kmpc_threadprivate_cached: { 274 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 275 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 276 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 277 CGM.VoidPtrTy, CGM.SizeTy, 278 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 279 llvm::FunctionType *FnTy = 280 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 281 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 282 break; 283 } 284 case OMPRTL__kmpc_critical: { 285 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 286 // kmp_critical_name *crit); 287 llvm::Type *TypeParams[] = { 288 getIdentTyPointerTy(), CGM.Int32Ty, 289 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 290 llvm::FunctionType *FnTy = 291 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 292 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 293 break; 294 } 295 case OMPRTL__kmpc_threadprivate_register: { 296 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 297 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 298 // typedef void *(*kmpc_ctor)(void *); 299 auto KmpcCtorTy = 300 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 301 /*isVarArg*/ false)->getPointerTo(); 302 // typedef void *(*kmpc_cctor)(void *, void *); 303 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 304 auto KmpcCopyCtorTy = 305 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 306 /*isVarArg*/ false)->getPointerTo(); 307 // typedef void (*kmpc_dtor)(void *); 308 auto KmpcDtorTy = 309 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 310 ->getPointerTo(); 311 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 312 KmpcCopyCtorTy, KmpcDtorTy}; 313 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 314 /*isVarArg*/ false); 315 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 316 break; 317 } 318 case OMPRTL__kmpc_end_critical: { 319 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 320 // kmp_critical_name *crit); 321 llvm::Type *TypeParams[] = { 322 getIdentTyPointerTy(), CGM.Int32Ty, 323 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 324 llvm::FunctionType *FnTy = 325 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 326 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 327 break; 328 } 329 case OMPRTL__kmpc_cancel_barrier: { 330 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 331 // global_tid); 332 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 333 llvm::FunctionType *FnTy = 334 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 335 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 336 break; 337 } 338 // Build __kmpc_for_static_init*( 339 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 340 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 341 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 342 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 343 case OMPRTL__kmpc_for_static_init_4: { 344 auto ITy = CGM.Int32Ty; 345 auto PtrTy = llvm::PointerType::getUnqual(ITy); 346 llvm::Type *TypeParams[] = { 347 getIdentTyPointerTy(), // loc 348 CGM.Int32Ty, // tid 349 CGM.Int32Ty, // schedtype 350 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 351 PtrTy, // p_lower 352 PtrTy, // p_upper 353 PtrTy, // p_stride 354 ITy, // incr 355 ITy // chunk 356 }; 357 llvm::FunctionType *FnTy = 358 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 359 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4"); 360 break; 361 } 362 case OMPRTL__kmpc_for_static_init_4u: { 363 auto ITy = CGM.Int32Ty; 364 auto PtrTy = llvm::PointerType::getUnqual(ITy); 365 llvm::Type *TypeParams[] = { 366 getIdentTyPointerTy(), // loc 367 CGM.Int32Ty, // tid 368 CGM.Int32Ty, // schedtype 369 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 370 PtrTy, // p_lower 371 PtrTy, // p_upper 372 PtrTy, // p_stride 373 ITy, // incr 374 ITy // chunk 375 }; 376 llvm::FunctionType *FnTy = 377 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 378 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u"); 379 break; 380 } 381 case OMPRTL__kmpc_for_static_init_8: { 382 auto ITy = CGM.Int64Ty; 383 auto PtrTy = llvm::PointerType::getUnqual(ITy); 384 llvm::Type *TypeParams[] = { 385 getIdentTyPointerTy(), // loc 386 CGM.Int32Ty, // tid 387 CGM.Int32Ty, // schedtype 388 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 389 PtrTy, // p_lower 390 PtrTy, // p_upper 391 PtrTy, // p_stride 392 ITy, // incr 393 ITy // chunk 394 }; 395 llvm::FunctionType *FnTy = 396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 397 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8"); 398 break; 399 } 400 case OMPRTL__kmpc_for_static_init_8u: { 401 auto ITy = CGM.Int64Ty; 402 auto PtrTy = llvm::PointerType::getUnqual(ITy); 403 llvm::Type *TypeParams[] = { 404 getIdentTyPointerTy(), // loc 405 CGM.Int32Ty, // tid 406 CGM.Int32Ty, // schedtype 407 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 408 PtrTy, // p_lower 409 PtrTy, // p_upper 410 PtrTy, // p_stride 411 ITy, // incr 412 ITy // chunk 413 }; 414 llvm::FunctionType *FnTy = 415 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 416 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u"); 417 break; 418 } 419 case OMPRTL__kmpc_for_static_fini: { 420 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 421 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 422 llvm::FunctionType *FnTy = 423 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 424 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 425 break; 426 } 427 case OMPRTL__kmpc_push_num_threads: { 428 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 429 // kmp_int32 num_threads) 430 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 431 CGM.Int32Ty}; 432 llvm::FunctionType *FnTy = 433 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 434 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 435 break; 436 } 437 case OMPRTL__kmpc_serialized_parallel: { 438 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 439 // global_tid); 440 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 441 llvm::FunctionType *FnTy = 442 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 443 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 444 break; 445 } 446 case OMPRTL__kmpc_end_serialized_parallel: { 447 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 448 // global_tid); 449 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 450 llvm::FunctionType *FnTy = 451 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 452 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 453 break; 454 } 455 case OMPRTL__kmpc_flush: { 456 // Build void __kmpc_flush(ident_t *loc); 457 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 458 llvm::FunctionType *FnTy = 459 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 460 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 461 break; 462 } 463 case OMPRTL__kmpc_master: { 464 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 465 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 466 llvm::FunctionType *FnTy = 467 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 468 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 469 break; 470 } 471 case OMPRTL__kmpc_end_master: { 472 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 473 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 474 llvm::FunctionType *FnTy = 475 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 476 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 477 break; 478 } 479 case OMPRTL__kmpc_omp_taskyield: { 480 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 481 // int end_part); 482 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 483 llvm::FunctionType *FnTy = 484 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 485 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 486 break; 487 } 488 case OMPRTL__kmpc_single: { 489 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 490 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 491 llvm::FunctionType *FnTy = 492 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 493 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 494 break; 495 } 496 case OMPRTL__kmpc_end_single: { 497 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 498 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 499 llvm::FunctionType *FnTy = 500 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 501 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 502 break; 503 } 504 } 505 return RTLFn; 506 } 507 508 llvm::Constant * 509 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 510 // Lookup the entry, lazily creating it if necessary. 511 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, 512 Twine(CGM.getMangledName(VD)) + ".cache."); 513 } 514 515 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 516 const VarDecl *VD, 517 llvm::Value *VDAddr, 518 SourceLocation Loc) { 519 auto VarTy = VDAddr->getType()->getPointerElementType(); 520 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 521 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), 522 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 523 getOrCreateThreadPrivateCache(VD)}; 524 return CGF.EmitRuntimeCall( 525 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); 526 } 527 528 void CGOpenMPRuntime::emitThreadPrivateVarInit( 529 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, 530 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 531 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 532 // library. 533 auto OMPLoc = emitUpdateLocation(CGF, Loc); 534 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 535 OMPLoc); 536 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 537 // to register constructor/destructor for variable. 538 llvm::Value *Args[] = {OMPLoc, 539 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), 540 Ctor, CopyCtor, Dtor}; 541 CGF.EmitRuntimeCall( 542 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 543 } 544 545 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 546 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, 547 bool PerformInit, CodeGenFunction *CGF) { 548 VD = VD->getDefinition(CGM.getContext()); 549 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { 550 ThreadPrivateWithDefinition.insert(VD); 551 QualType ASTTy = VD->getType(); 552 553 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 554 auto Init = VD->getAnyInitializer(); 555 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 556 // Generate function that re-emits the declaration's initializer into the 557 // threadprivate copy of the variable VD 558 CodeGenFunction CtorCGF(CGM); 559 FunctionArgList Args; 560 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 561 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 562 Args.push_back(&Dst); 563 564 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 565 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), 566 /*isVariadic=*/false); 567 auto FTy = CGM.getTypes().GetFunctionType(FI); 568 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 569 FTy, ".__kmpc_global_ctor_.", Loc); 570 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 571 Args, SourceLocation()); 572 auto ArgVal = CtorCGF.EmitLoadOfScalar( 573 CtorCGF.GetAddrOfLocalVar(&Dst), 574 /*Volatile=*/false, CGM.PointerAlignInBytes, 575 CGM.getContext().VoidPtrTy, Dst.getLocation()); 576 auto Arg = CtorCGF.Builder.CreatePointerCast( 577 ArgVal, 578 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); 579 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 580 /*IsInitializer=*/true); 581 ArgVal = CtorCGF.EmitLoadOfScalar( 582 CtorCGF.GetAddrOfLocalVar(&Dst), 583 /*Volatile=*/false, CGM.PointerAlignInBytes, 584 CGM.getContext().VoidPtrTy, Dst.getLocation()); 585 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 586 CtorCGF.FinishFunction(); 587 Ctor = Fn; 588 } 589 if (VD->getType().isDestructedType() != QualType::DK_none) { 590 // Generate function that emits destructor call for the threadprivate copy 591 // of the variable VD 592 CodeGenFunction DtorCGF(CGM); 593 FunctionArgList Args; 594 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), 595 /*Id=*/nullptr, CGM.getContext().VoidPtrTy); 596 Args.push_back(&Dst); 597 598 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( 599 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), 600 /*isVariadic=*/false); 601 auto FTy = CGM.getTypes().GetFunctionType(FI); 602 auto Fn = CGM.CreateGlobalInitOrDestructFunction( 603 FTy, ".__kmpc_global_dtor_.", Loc); 604 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 605 SourceLocation()); 606 auto ArgVal = DtorCGF.EmitLoadOfScalar( 607 DtorCGF.GetAddrOfLocalVar(&Dst), 608 /*Volatile=*/false, CGM.PointerAlignInBytes, 609 CGM.getContext().VoidPtrTy, Dst.getLocation()); 610 DtorCGF.emitDestroy(ArgVal, ASTTy, 611 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 612 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 613 DtorCGF.FinishFunction(); 614 Dtor = Fn; 615 } 616 // Do not emit init function if it is not required. 617 if (!Ctor && !Dtor) 618 return nullptr; 619 620 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 621 auto CopyCtorTy = 622 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 623 /*isVarArg=*/false)->getPointerTo(); 624 // Copying constructor for the threadprivate variable. 625 // Must be NULL - reserved by runtime, but currently it requires that this 626 // parameter is always NULL. Otherwise it fires assertion. 627 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 628 if (Ctor == nullptr) { 629 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 630 /*isVarArg=*/false)->getPointerTo(); 631 Ctor = llvm::Constant::getNullValue(CtorTy); 632 } 633 if (Dtor == nullptr) { 634 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 635 /*isVarArg=*/false)->getPointerTo(); 636 Dtor = llvm::Constant::getNullValue(DtorTy); 637 } 638 if (!CGF) { 639 auto InitFunctionTy = 640 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 641 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( 642 InitFunctionTy, ".__omp_threadprivate_init_."); 643 CodeGenFunction InitCGF(CGM); 644 FunctionArgList ArgList; 645 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 646 CGM.getTypes().arrangeNullaryFunction(), ArgList, 647 Loc); 648 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 649 InitCGF.FinishFunction(); 650 return InitFunction; 651 } 652 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 653 } 654 return nullptr; 655 } 656 657 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 658 llvm::Value *OutlinedFn, 659 llvm::Value *CapturedStruct) { 660 // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) 661 llvm::Value *Args[] = { 662 emitUpdateLocation(CGF, Loc), 663 CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument 664 // (there is only one additional argument - 'context') 665 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), 666 CGF.EmitCastToVoidPtr(CapturedStruct)}; 667 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); 668 CGF.EmitRuntimeCall(RTLFn, Args); 669 } 670 671 void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc, 672 llvm::Value *OutlinedFn, 673 llvm::Value *CapturedStruct) { 674 auto ThreadID = getThreadID(CGF, Loc); 675 // Build calls: 676 // __kmpc_serialized_parallel(&Loc, GTid); 677 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 678 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), 679 Args); 680 681 // OutlinedFn(>id, &zero, CapturedStruct); 682 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); 683 auto Int32Ty = 684 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 685 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); 686 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 687 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; 688 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); 689 690 // __kmpc_end_serialized_parallel(&Loc, GTid); 691 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; 692 CGF.EmitRuntimeCall( 693 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); 694 } 695 696 // If we're inside an (outlined) parallel region, use the region info's 697 // thread-ID variable (it is passed in a first argument of the outlined function 698 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 699 // regular serial code region, get thread ID by calling kmp_int32 700 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 701 // return the address of that temp. 702 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 703 SourceLocation Loc) { 704 if (auto OMPRegionInfo = 705 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 706 return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF), 707 SourceLocation()).getScalarVal(); 708 auto ThreadID = getThreadID(CGF, Loc); 709 auto Int32Ty = 710 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 711 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 712 CGF.EmitStoreOfScalar(ThreadID, 713 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); 714 715 return ThreadIDTemp; 716 } 717 718 llvm::Constant * 719 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, 720 const llvm::Twine &Name) { 721 SmallString<256> Buffer; 722 llvm::raw_svector_ostream Out(Buffer); 723 Out << Name; 724 auto RuntimeName = Out.str(); 725 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; 726 if (Elem.second) { 727 assert(Elem.second->getType()->getPointerElementType() == Ty && 728 "OMP internal variable has different type than requested"); 729 return &*Elem.second; 730 } 731 732 return Elem.second = new llvm::GlobalVariable( 733 CGM.getModule(), Ty, /*IsConstant*/ false, 734 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 735 Elem.first()); 736 } 737 738 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 739 llvm::Twine Name(".gomp_critical_user_", CriticalName); 740 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); 741 } 742 743 void CGOpenMPRuntime::emitCriticalRegion( 744 CodeGenFunction &CGF, StringRef CriticalName, 745 const std::function<void()> &CriticalOpGen, SourceLocation Loc) { 746 auto RegionLock = getCriticalRegionLock(CriticalName); 747 // __kmpc_critical(ident_t *, gtid, Lock); 748 // CriticalOpGen(); 749 // __kmpc_end_critical(ident_t *, gtid, Lock); 750 // Prepare arguments and build a call to __kmpc_critical 751 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 752 RegionLock}; 753 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); 754 CriticalOpGen(); 755 // Build a call to __kmpc_end_critical 756 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 757 } 758 759 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, 760 const std::function<void()> &BodyOpGen) { 761 llvm::Value *CallBool = CGF.EmitScalarConversion( 762 IfCond, 763 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), 764 CGF.getContext().BoolTy); 765 766 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 767 auto *ContBlock = CGF.createBasicBlock("omp_if.end"); 768 // Generate the branch (If-stmt) 769 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 770 CGF.EmitBlock(ThenBlock); 771 BodyOpGen(); 772 // Emit the rest of bblocks/branches 773 CGF.EmitBranch(ContBlock); 774 CGF.EmitBlock(ContBlock, true); 775 } 776 777 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 778 const std::function<void()> &MasterOpGen, 779 SourceLocation Loc) { 780 // if(__kmpc_master(ident_t *, gtid)) { 781 // MasterOpGen(); 782 // __kmpc_end_master(ident_t *, gtid); 783 // } 784 // Prepare arguments and build a call to __kmpc_master 785 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 786 auto *IsMaster = 787 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); 788 emitIfStmt(CGF, IsMaster, [&]() -> void { 789 MasterOpGen(); 790 // Build a call to __kmpc_end_master. 791 // OpenMP [1.2.2 OpenMP Language Terminology] 792 // For C/C++, an executable statement, possibly compound, with a single 793 // entry at the top and a single exit at the bottom, or an OpenMP construct. 794 // * Access to the structured block must not be the result of a branch. 795 // * The point of exit cannot be a branch out of the structured block. 796 // * The point of entry must not be a call to setjmp(). 797 // * longjmp() and throw() must not violate the entry/exit criteria. 798 // * An expression statement, iteration statement, selection statement, or 799 // try block is considered to be a structured block if the corresponding 800 // compound statement obtained by enclosing it in { and } would be a 801 // structured block. 802 // It is analyzed in Sema, so we can just call __kmpc_end_master() on 803 // fallthrough rather than pushing a normal cleanup for it. 804 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args); 805 }); 806 } 807 808 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 809 SourceLocation Loc) { 810 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 811 llvm::Value *Args[] = { 812 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 813 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 814 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 815 } 816 817 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 818 const std::function<void()> &SingleOpGen, 819 SourceLocation Loc) { 820 // if(__kmpc_single(ident_t *, gtid)) { 821 // SingleOpGen(); 822 // __kmpc_end_single(ident_t *, gtid); 823 // } 824 // Prepare arguments and build a call to __kmpc_single 825 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 826 auto *IsSingle = 827 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); 828 emitIfStmt(CGF, IsSingle, [&]() -> void { 829 SingleOpGen(); 830 // Build a call to __kmpc_end_single. 831 // OpenMP [1.2.2 OpenMP Language Terminology] 832 // For C/C++, an executable statement, possibly compound, with a single 833 // entry at the top and a single exit at the bottom, or an OpenMP construct. 834 // * Access to the structured block must not be the result of a branch. 835 // * The point of exit cannot be a branch out of the structured block. 836 // * The point of entry must not be a call to setjmp(). 837 // * longjmp() and throw() must not violate the entry/exit criteria. 838 // * An expression statement, iteration statement, selection statement, or 839 // try block is considered to be a structured block if the corresponding 840 // compound statement obtained by enclosing it in { and } would be a 841 // structured block. 842 // It is analyzed in Sema, so we can just call __kmpc_end_single() on 843 // fallthrough rather than pushing a normal cleanup for it. 844 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args); 845 }); 846 } 847 848 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 849 bool IsExplicit) { 850 // Build call __kmpc_cancel_barrier(loc, thread_id); 851 auto Flags = static_cast<OpenMPLocationFlags>( 852 OMP_IDENT_KMPC | 853 (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL)); 854 // Build call __kmpc_cancel_barrier(loc, thread_id); 855 // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this 856 // one provides the same functionality and adds initial support for 857 // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier() 858 // is provided default by the runtime library so it safe to make such 859 // replacement. 860 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 861 getThreadID(CGF, Loc)}; 862 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 863 } 864 865 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from 866 /// the enum sched_type in kmp.h). 867 enum OpenMPSchedType { 868 /// \brief Lower bound for default (unordered) versions. 869 OMP_sch_lower = 32, 870 OMP_sch_static_chunked = 33, 871 OMP_sch_static = 34, 872 OMP_sch_dynamic_chunked = 35, 873 OMP_sch_guided_chunked = 36, 874 OMP_sch_runtime = 37, 875 OMP_sch_auto = 38, 876 /// \brief Lower bound for 'ordered' versions. 877 OMP_ord_lower = 64, 878 /// \brief Lower bound for 'nomerge' versions. 879 OMP_nm_lower = 160, 880 }; 881 882 /// \brief Map the OpenMP loop schedule to the runtime enumeration. 883 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 884 bool Chunked) { 885 switch (ScheduleKind) { 886 case OMPC_SCHEDULE_static: 887 return Chunked ? OMP_sch_static_chunked : OMP_sch_static; 888 case OMPC_SCHEDULE_dynamic: 889 return OMP_sch_dynamic_chunked; 890 case OMPC_SCHEDULE_guided: 891 return OMP_sch_guided_chunked; 892 case OMPC_SCHEDULE_auto: 893 return OMP_sch_auto; 894 case OMPC_SCHEDULE_runtime: 895 return OMP_sch_runtime; 896 case OMPC_SCHEDULE_unknown: 897 assert(!Chunked && "chunk was specified but schedule kind not known"); 898 return OMP_sch_static; 899 } 900 llvm_unreachable("Unexpected runtime schedule"); 901 } 902 903 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 904 bool Chunked) const { 905 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 906 return Schedule == OMP_sch_static; 907 } 908 909 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 910 auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false); 911 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 912 return Schedule != OMP_sch_static; 913 } 914 915 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, 916 OpenMPScheduleClauseKind ScheduleKind, 917 unsigned IVSize, bool IVSigned, 918 llvm::Value *IL, llvm::Value *LB, 919 llvm::Value *UB, llvm::Value *ST, 920 llvm::Value *Chunk) { 921 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); 922 // Call __kmpc_for_static_init( 923 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 924 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 925 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 926 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 927 // TODO: Implement dynamic schedule. 928 929 // If the Chunk was not specified in the clause - use default value 1. 930 if (Chunk == nullptr) 931 Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1); 932 933 llvm::Value *Args[] = { 934 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), 935 CGF.Builder.getInt32(Schedule), // Schedule type 936 IL, // &isLastIter 937 LB, // &LB 938 UB, // &UB 939 ST, // &Stride 940 CGF.Builder.getIntN(IVSize, 1), // Incr 941 Chunk // Chunk 942 }; 943 assert((IVSize == 32 || IVSize == 64) && 944 "Index size is not compatible with the omp runtime"); 945 auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4 946 : OMPRTL__kmpc_for_static_init_4u) 947 : (IVSigned ? OMPRTL__kmpc_for_static_init_8 948 : OMPRTL__kmpc_for_static_init_8u); 949 CGF.EmitRuntimeCall(createRuntimeFunction(F), Args); 950 } 951 952 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc, 953 OpenMPScheduleClauseKind ScheduleKind) { 954 assert((ScheduleKind == OMPC_SCHEDULE_static || 955 ScheduleKind == OMPC_SCHEDULE_unknown) && 956 "Non-static schedule kinds are not yet implemented"); 957 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 958 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), 959 getThreadID(CGF, Loc)}; 960 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 961 Args); 962 } 963 964 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 965 llvm::Value *NumThreads, 966 SourceLocation Loc) { 967 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 968 llvm::Value *Args[] = { 969 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 970 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 971 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 972 Args); 973 } 974 975 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 976 SourceLocation Loc) { 977 // Build call void __kmpc_flush(ident_t *loc) 978 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 979 emitUpdateLocation(CGF, Loc)); 980 } 981 982