1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// \brief This file implements semantic analysis for CUDA constructs. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Decl.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Lex/Preprocessor.h" 18 #include "clang/Sema/Lookup.h" 19 #include "clang/Sema/Sema.h" 20 #include "clang/Sema/SemaDiagnostic.h" 21 #include "clang/Sema/SemaInternal.h" 22 #include "clang/Sema/Template.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 using namespace clang; 26 27 void Sema::PushForceCUDAHostDevice() { 28 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 29 ForceCUDAHostDeviceDepth++; 30 } 31 32 bool Sema::PopForceCUDAHostDevice() { 33 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 34 if (ForceCUDAHostDeviceDepth == 0) 35 return false; 36 ForceCUDAHostDeviceDepth--; 37 return true; 38 } 39 40 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 41 MultiExprArg ExecConfig, 42 SourceLocation GGGLoc) { 43 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 44 if (!ConfigDecl) 45 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 46 << "cudaConfigureCall"); 47 QualType ConfigQTy = ConfigDecl->getType(); 48 49 DeclRefExpr *ConfigDR = new (Context) 50 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 51 MarkFunctionReferenced(LLLLoc, ConfigDecl); 52 53 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 54 /*IsExecConfig=*/true); 55 } 56 57 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) { 58 bool HasHostAttr = false; 59 bool HasDeviceAttr = false; 60 bool HasGlobalAttr = false; 61 bool HasInvalidTargetAttr = false; 62 while (Attr) { 63 switch(Attr->getKind()){ 64 case AttributeList::AT_CUDAGlobal: 65 HasGlobalAttr = true; 66 break; 67 case AttributeList::AT_CUDAHost: 68 HasHostAttr = true; 69 break; 70 case AttributeList::AT_CUDADevice: 71 HasDeviceAttr = true; 72 break; 73 case AttributeList::AT_CUDAInvalidTarget: 74 HasInvalidTargetAttr = true; 75 break; 76 default: 77 break; 78 } 79 Attr = Attr->getNext(); 80 } 81 if (HasInvalidTargetAttr) 82 return CFT_InvalidTarget; 83 84 if (HasGlobalAttr) 85 return CFT_Global; 86 87 if (HasHostAttr && HasDeviceAttr) 88 return CFT_HostDevice; 89 90 if (HasDeviceAttr) 91 return CFT_Device; 92 93 return CFT_Host; 94 } 95 96 template <typename A> 97 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 98 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 99 return isa<A>(Attribute) && 100 !(IgnoreImplicitAttr && Attribute->isImplicit()); 101 }); 102 } 103 104 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 105 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 106 bool IgnoreImplicitHDAttr) { 107 // Code that lives outside a function is run on the host. 108 if (D == nullptr) 109 return CFT_Host; 110 111 if (D->hasAttr<CUDAInvalidTargetAttr>()) 112 return CFT_InvalidTarget; 113 114 if (D->hasAttr<CUDAGlobalAttr>()) 115 return CFT_Global; 116 117 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 118 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 119 return CFT_HostDevice; 120 return CFT_Device; 121 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 122 return CFT_Host; 123 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 124 // Some implicit declarations (like intrinsic functions) are not marked. 125 // Set the most lenient target on them for maximal flexibility. 126 return CFT_HostDevice; 127 } 128 129 return CFT_Host; 130 } 131 132 // * CUDA Call preference table 133 // 134 // F - from, 135 // T - to 136 // Ph - preference in host mode 137 // Pd - preference in device mode 138 // H - handled in (x) 139 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 140 // 141 // | F | T | Ph | Pd | H | 142 // |----+----+-----+-----+-----+ 143 // | d | d | N | N | (c) | 144 // | d | g | -- | -- | (a) | 145 // | d | h | -- | -- | (e) | 146 // | d | hd | HD | HD | (b) | 147 // | g | d | N | N | (c) | 148 // | g | g | -- | -- | (a) | 149 // | g | h | -- | -- | (e) | 150 // | g | hd | HD | HD | (b) | 151 // | h | d | -- | -- | (e) | 152 // | h | g | N | N | (c) | 153 // | h | h | N | N | (c) | 154 // | h | hd | HD | HD | (b) | 155 // | hd | d | WS | SS | (d) | 156 // | hd | g | SS | -- |(d/a)| 157 // | hd | h | SS | WS | (d) | 158 // | hd | hd | HD | HD | (b) | 159 160 Sema::CUDAFunctionPreference 161 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 162 const FunctionDecl *Callee) { 163 assert(Callee && "Callee must be valid."); 164 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 165 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 166 167 // If one of the targets is invalid, the check always fails, no matter what 168 // the other target is. 169 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 170 return CFP_Never; 171 172 // (a) Can't call global from some contexts until we support CUDA's 173 // dynamic parallelism. 174 if (CalleeTarget == CFT_Global && 175 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 176 return CFP_Never; 177 178 // (b) Calling HostDevice is OK for everyone. 179 if (CalleeTarget == CFT_HostDevice) 180 return CFP_HostDevice; 181 182 // (c) Best case scenarios 183 if (CalleeTarget == CallerTarget || 184 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 185 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 186 return CFP_Native; 187 188 // (d) HostDevice behavior depends on compilation mode. 189 if (CallerTarget == CFT_HostDevice) { 190 // It's OK to call a compilation-mode matching function from an HD one. 191 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 192 (!getLangOpts().CUDAIsDevice && 193 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 194 return CFP_SameSide; 195 196 // Calls from HD to non-mode-matching functions (i.e., to host functions 197 // when compiling in device mode or to device functions when compiling in 198 // host mode) are allowed at the sema level, but eventually rejected if 199 // they're ever codegened. TODO: Reject said calls earlier. 200 return CFP_WrongSide; 201 } 202 203 // (e) Calling across device/host boundary is not something you should do. 204 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 205 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 206 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 207 return CFP_Never; 208 209 llvm_unreachable("All cases should've been handled by now."); 210 } 211 212 void Sema::EraseUnwantedCUDAMatches( 213 const FunctionDecl *Caller, 214 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 215 if (Matches.size() <= 1) 216 return; 217 218 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 219 220 // Gets the CUDA function preference for a call from Caller to Match. 221 auto GetCFP = [&](const Pair &Match) { 222 return IdentifyCUDAPreference(Caller, Match.second); 223 }; 224 225 // Find the best call preference among the functions in Matches. 226 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 227 Matches.begin(), Matches.end(), 228 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 229 230 // Erase all functions with lower priority. 231 llvm::erase_if(Matches, 232 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 233 } 234 235 /// When an implicitly-declared special member has to invoke more than one 236 /// base/field special member, conflicts may occur in the targets of these 237 /// members. For example, if one base's member __host__ and another's is 238 /// __device__, it's a conflict. 239 /// This function figures out if the given targets \param Target1 and 240 /// \param Target2 conflict, and if they do not it fills in 241 /// \param ResolvedTarget with a target that resolves for both calls. 242 /// \return true if there's a conflict, false otherwise. 243 static bool 244 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 245 Sema::CUDAFunctionTarget Target2, 246 Sema::CUDAFunctionTarget *ResolvedTarget) { 247 // Only free functions and static member functions may be global. 248 assert(Target1 != Sema::CFT_Global); 249 assert(Target2 != Sema::CFT_Global); 250 251 if (Target1 == Sema::CFT_HostDevice) { 252 *ResolvedTarget = Target2; 253 } else if (Target2 == Sema::CFT_HostDevice) { 254 *ResolvedTarget = Target1; 255 } else if (Target1 != Target2) { 256 return true; 257 } else { 258 *ResolvedTarget = Target1; 259 } 260 261 return false; 262 } 263 264 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 265 CXXSpecialMember CSM, 266 CXXMethodDecl *MemberDecl, 267 bool ConstRHS, 268 bool Diagnose) { 269 llvm::Optional<CUDAFunctionTarget> InferredTarget; 270 271 // We're going to invoke special member lookup; mark that these special 272 // members are called from this one, and not from its caller. 273 ContextRAII MethodContext(*this, MemberDecl); 274 275 // Look for special members in base classes that should be invoked from here. 276 // Infer the target of this member base on the ones it should call. 277 // Skip direct and indirect virtual bases for abstract classes. 278 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 279 for (const auto &B : ClassDecl->bases()) { 280 if (!B.isVirtual()) { 281 Bases.push_back(&B); 282 } 283 } 284 285 if (!ClassDecl->isAbstract()) { 286 for (const auto &VB : ClassDecl->vbases()) { 287 Bases.push_back(&VB); 288 } 289 } 290 291 for (const auto *B : Bases) { 292 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 293 if (!BaseType) { 294 continue; 295 } 296 297 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 298 Sema::SpecialMemberOverloadResult SMOR = 299 LookupSpecialMember(BaseClassDecl, CSM, 300 /* ConstArg */ ConstRHS, 301 /* VolatileArg */ false, 302 /* RValueThis */ false, 303 /* ConstThis */ false, 304 /* VolatileThis */ false); 305 306 if (!SMOR.getMethod()) 307 continue; 308 309 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 310 if (!InferredTarget.hasValue()) { 311 InferredTarget = BaseMethodTarget; 312 } else { 313 bool ResolutionError = resolveCalleeCUDATargetConflict( 314 InferredTarget.getValue(), BaseMethodTarget, 315 InferredTarget.getPointer()); 316 if (ResolutionError) { 317 if (Diagnose) { 318 Diag(ClassDecl->getLocation(), 319 diag::note_implicit_member_target_infer_collision) 320 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 321 } 322 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 323 return true; 324 } 325 } 326 } 327 328 // Same as for bases, but now for special members of fields. 329 for (const auto *F : ClassDecl->fields()) { 330 if (F->isInvalidDecl()) { 331 continue; 332 } 333 334 const RecordType *FieldType = 335 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 336 if (!FieldType) { 337 continue; 338 } 339 340 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 341 Sema::SpecialMemberOverloadResult SMOR = 342 LookupSpecialMember(FieldRecDecl, CSM, 343 /* ConstArg */ ConstRHS && !F->isMutable(), 344 /* VolatileArg */ false, 345 /* RValueThis */ false, 346 /* ConstThis */ false, 347 /* VolatileThis */ false); 348 349 if (!SMOR.getMethod()) 350 continue; 351 352 CUDAFunctionTarget FieldMethodTarget = 353 IdentifyCUDATarget(SMOR.getMethod()); 354 if (!InferredTarget.hasValue()) { 355 InferredTarget = FieldMethodTarget; 356 } else { 357 bool ResolutionError = resolveCalleeCUDATargetConflict( 358 InferredTarget.getValue(), FieldMethodTarget, 359 InferredTarget.getPointer()); 360 if (ResolutionError) { 361 if (Diagnose) { 362 Diag(ClassDecl->getLocation(), 363 diag::note_implicit_member_target_infer_collision) 364 << (unsigned)CSM << InferredTarget.getValue() 365 << FieldMethodTarget; 366 } 367 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 368 return true; 369 } 370 } 371 } 372 373 if (InferredTarget.hasValue()) { 374 if (InferredTarget.getValue() == CFT_Device) { 375 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 376 } else if (InferredTarget.getValue() == CFT_Host) { 377 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 378 } else { 379 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 380 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 381 } 382 } else { 383 // If no target was inferred, mark this member as __host__ __device__; 384 // it's the least restrictive option that can be invoked from any target. 385 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 386 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 387 } 388 389 return false; 390 } 391 392 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 393 if (!CD->isDefined() && CD->isTemplateInstantiation()) 394 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 395 396 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 397 // empty at a point in the translation unit, if it is either a 398 // trivial constructor 399 if (CD->isTrivial()) 400 return true; 401 402 // ... or it satisfies all of the following conditions: 403 // The constructor function has been defined. 404 // The constructor function has no parameters, 405 // and the function body is an empty compound statement. 406 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 407 return false; 408 409 // Its class has no virtual functions and no virtual base classes. 410 if (CD->getParent()->isDynamicClass()) 411 return false; 412 413 // The only form of initializer allowed is an empty constructor. 414 // This will recursively check all base classes and member initializers 415 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 416 if (const CXXConstructExpr *CE = 417 dyn_cast<CXXConstructExpr>(CI->getInit())) 418 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 419 return false; 420 })) 421 return false; 422 423 return true; 424 } 425 426 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 427 // No destructor -> no problem. 428 if (!DD) 429 return true; 430 431 if (!DD->isDefined() && DD->isTemplateInstantiation()) 432 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 433 434 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 435 // empty at a point in the translation unit, if it is either a 436 // trivial constructor 437 if (DD->isTrivial()) 438 return true; 439 440 // ... or it satisfies all of the following conditions: 441 // The destructor function has been defined. 442 // and the function body is an empty compound statement. 443 if (!DD->hasTrivialBody()) 444 return false; 445 446 const CXXRecordDecl *ClassDecl = DD->getParent(); 447 448 // Its class has no virtual functions and no virtual base classes. 449 if (ClassDecl->isDynamicClass()) 450 return false; 451 452 // Only empty destructors are allowed. This will recursively check 453 // destructors for all base classes... 454 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 455 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 456 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 457 return true; 458 })) 459 return false; 460 461 // ... and member fields. 462 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 463 if (CXXRecordDecl *RD = Field->getType() 464 ->getBaseElementTypeUnsafe() 465 ->getAsCXXRecordDecl()) 466 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 467 return true; 468 })) 469 return false; 470 471 return true; 472 } 473 474 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 475 // treated as implicitly __host__ __device__, unless: 476 // * it is a variadic function (device-side variadic functions are not 477 // allowed), or 478 // * a __device__ function with this signature was already declared, in which 479 // case in which case we output an error, unless the __device__ decl is in a 480 // system header, in which case we leave the constexpr function unattributed. 481 // 482 // In addition, all function decls are treated as __host__ __device__ when 483 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 484 // #pragma clang force_cuda_host_device_begin/end 485 // pair). 486 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 487 const LookupResult &Previous) { 488 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 489 490 if (ForceCUDAHostDeviceDepth > 0) { 491 if (!NewD->hasAttr<CUDAHostAttr>()) 492 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 493 if (!NewD->hasAttr<CUDADeviceAttr>()) 494 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 495 return; 496 } 497 498 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 499 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 500 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 501 return; 502 503 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 504 // attributes? 505 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 506 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 507 D = Using->getTargetDecl(); 508 FunctionDecl *OldD = D->getAsFunction(); 509 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 510 !OldD->hasAttr<CUDAHostAttr>() && 511 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 512 /* ConsiderCudaAttrs = */ false); 513 }; 514 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 515 if (It != Previous.end()) { 516 // We found a __device__ function with the same name and signature as NewD 517 // (ignoring CUDA attrs). This is an error unless that function is defined 518 // in a system header, in which case we simply return without making NewD 519 // host+device. 520 NamedDecl *Match = *It; 521 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 522 Diag(NewD->getLocation(), 523 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 524 << NewD->getName(); 525 Diag(Match->getLocation(), 526 diag::note_cuda_conflicting_device_function_declared_here); 527 } 528 return; 529 } 530 531 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 532 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 533 } 534 535 // In CUDA, there are some constructs which may appear in semantically-valid 536 // code, but trigger errors if we ever generate code for the function in which 537 // they appear. Essentially every construct you're not allowed to use on the 538 // device falls into this category, because you are allowed to use these 539 // constructs in a __host__ __device__ function, but only if that function is 540 // never codegen'ed on the device. 541 // 542 // To handle semantic checking for these constructs, we keep track of the set of 543 // functions we know will be emitted, either because we could tell a priori that 544 // they would be emitted, or because they were transitively called by a 545 // known-emitted function. 546 // 547 // We also keep a partial call graph of which not-known-emitted functions call 548 // which other not-known-emitted functions. 549 // 550 // When we see something which is illegal if the current function is emitted 551 // (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or 552 // CheckCUDACall), we first check if the current function is known-emitted. If 553 // so, we immediately output the diagnostic. 554 // 555 // Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags 556 // until we discover that the function is known-emitted, at which point we take 557 // it out of this map and emit the diagnostic. 558 559 Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, 560 unsigned DiagID, FunctionDecl *Fn, 561 Sema &S) 562 : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn), 563 ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) { 564 switch (K) { 565 case K_Nop: 566 break; 567 case K_Immediate: 568 case K_ImmediateWithCallStack: 569 ImmediateDiag.emplace(S.Diag(Loc, DiagID)); 570 break; 571 case K_Deferred: 572 assert(Fn && "Must have a function to attach the deferred diag to."); 573 PartialDiag.emplace(S.PDiag(DiagID)); 574 break; 575 } 576 } 577 578 // Print notes showing how we can reach FD starting from an a priori 579 // known-callable function. 580 static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) { 581 auto FnIt = S.CUDAKnownEmittedFns.find(FD); 582 while (FnIt != S.CUDAKnownEmittedFns.end()) { 583 DiagnosticBuilder Builder( 584 S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); 585 Builder << FnIt->second.FD; 586 Builder.setForceEmit(); 587 588 FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD); 589 } 590 } 591 592 Sema::CUDADiagBuilder::~CUDADiagBuilder() { 593 if (ImmediateDiag) { 594 // Emit our diagnostic and, if it was a warning or error, output a callstack 595 // if Fn isn't a priori known-emitted. 596 bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel( 597 DiagID, Loc) >= DiagnosticsEngine::Warning; 598 ImmediateDiag.reset(); // Emit the immediate diag. 599 if (IsWarningOrError && ShowCallStack) 600 EmitCallStackNotes(S, Fn); 601 } else if (PartialDiag) { 602 assert(ShowCallStack && "Must always show call stack for deferred diags."); 603 S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)}); 604 } 605 } 606 607 // Do we know that we will eventually codegen the given function? 608 static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { 609 // Templates are emitted when they're instantiated. 610 if (FD->isDependentContext()) 611 return false; 612 613 // When compiling for device, host functions are never emitted. Similarly, 614 // when compiling for host, device and global functions are never emitted. 615 // (Technically, we do emit a host-side stub for global functions, but this 616 // doesn't count for our purposes here.) 617 Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); 618 if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) 619 return false; 620 if (!S.getLangOpts().CUDAIsDevice && 621 (T == Sema::CFT_Device || T == Sema::CFT_Global)) 622 return false; 623 624 // Check whether this function is externally visible -- if so, it's 625 // known-emitted. 626 // 627 // We have to check the GVA linkage of the function's *definition* -- if we 628 // only have a declaration, we don't know whether or not the function will be 629 // emitted, because (say) the definition could include "inline". 630 FunctionDecl *Def = FD->getDefinition(); 631 632 if (Def && 633 !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) 634 return true; 635 636 // Otherwise, the function is known-emitted if it's in our set of 637 // known-emitted functions. 638 return S.CUDAKnownEmittedFns.count(FD) > 0; 639 } 640 641 Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 642 unsigned DiagID) { 643 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 644 CUDADiagBuilder::Kind DiagKind = [&] { 645 switch (CurrentCUDATarget()) { 646 case CFT_Global: 647 case CFT_Device: 648 return CUDADiagBuilder::K_Immediate; 649 case CFT_HostDevice: 650 // An HD function counts as host code if we're compiling for host, and 651 // device code if we're compiling for device. Defer any errors in device 652 // mode until the function is known-emitted. 653 if (getLangOpts().CUDAIsDevice) { 654 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 655 ? CUDADiagBuilder::K_ImmediateWithCallStack 656 : CUDADiagBuilder::K_Deferred; 657 } 658 return CUDADiagBuilder::K_Nop; 659 660 default: 661 return CUDADiagBuilder::K_Nop; 662 } 663 }(); 664 return CUDADiagBuilder(DiagKind, Loc, DiagID, 665 dyn_cast<FunctionDecl>(CurContext), *this); 666 } 667 668 Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 669 unsigned DiagID) { 670 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 671 CUDADiagBuilder::Kind DiagKind = [&] { 672 switch (CurrentCUDATarget()) { 673 case CFT_Host: 674 return CUDADiagBuilder::K_Immediate; 675 case CFT_HostDevice: 676 // An HD function counts as host code if we're compiling for host, and 677 // device code if we're compiling for device. Defer any errors in device 678 // mode until the function is known-emitted. 679 if (getLangOpts().CUDAIsDevice) 680 return CUDADiagBuilder::K_Nop; 681 682 return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) 683 ? CUDADiagBuilder::K_ImmediateWithCallStack 684 : CUDADiagBuilder::K_Deferred; 685 default: 686 return CUDADiagBuilder::K_Nop; 687 } 688 }(); 689 return CUDADiagBuilder(DiagKind, Loc, DiagID, 690 dyn_cast<FunctionDecl>(CurContext), *this); 691 } 692 693 // Emit any deferred diagnostics for FD and erase them from the map in which 694 // they're stored. 695 static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) { 696 auto It = S.CUDADeferredDiags.find(FD); 697 if (It == S.CUDADeferredDiags.end()) 698 return; 699 bool HasWarningOrError = false; 700 for (PartialDiagnosticAt &PDAt : It->second) { 701 const SourceLocation &Loc = PDAt.first; 702 const PartialDiagnostic &PD = PDAt.second; 703 HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel( 704 PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning; 705 DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); 706 Builder.setForceEmit(); 707 PD.Emit(Builder); 708 } 709 S.CUDADeferredDiags.erase(It); 710 711 // FIXME: Should this be called after every warning/error emitted in the loop 712 // above, instead of just once per function? That would be consistent with 713 // how we handle immediate errors, but it also seems like a bit much. 714 if (HasWarningOrError) 715 EmitCallStackNotes(S, FD); 716 } 717 718 // Indicate that this function (and thus everything it transtively calls) will 719 // be codegen'ed, and emit any deferred diagnostics on this function and its 720 // (transitive) callees. 721 static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller, 722 FunctionDecl *OrigCallee, SourceLocation OrigLoc) { 723 // Nothing to do if we already know that FD is emitted. 724 if (IsKnownEmitted(S, OrigCallee)) { 725 assert(!S.CUDACallGraph.count(OrigCallee)); 726 return; 727 } 728 729 // We've just discovered that OrigCallee is known-emitted. Walk our call 730 // graph to see what else we can now discover also must be emitted. 731 732 struct CallInfo { 733 FunctionDecl *Caller; 734 FunctionDecl *Callee; 735 SourceLocation Loc; 736 }; 737 llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}}; 738 llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen; 739 Seen.insert(OrigCallee); 740 while (!Worklist.empty()) { 741 CallInfo C = Worklist.pop_back_val(); 742 assert(!IsKnownEmitted(S, C.Callee) && 743 "Worklist should not contain known-emitted functions."); 744 S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc}; 745 EmitDeferredDiags(S, C.Callee); 746 747 // If this is a template instantiation, explore its callgraph as well: 748 // Non-dependent calls are part of the template's callgraph, while dependent 749 // calls are part of to the instantiation's call graph. 750 if (auto *Templ = C.Callee->getPrimaryTemplate()) { 751 FunctionDecl *TemplFD = Templ->getAsFunction(); 752 if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { 753 Seen.insert(TemplFD); 754 Worklist.push_back( 755 {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc}); 756 } 757 } 758 759 // Add all functions called by Callee to our worklist. 760 auto CGIt = S.CUDACallGraph.find(C.Callee); 761 if (CGIt == S.CUDACallGraph.end()) 762 continue; 763 764 for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc : 765 CGIt->second) { 766 FunctionDecl *NewCallee = FDLoc.first; 767 SourceLocation CallLoc = FDLoc.second; 768 if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee)) 769 continue; 770 Seen.insert(NewCallee); 771 Worklist.push_back( 772 {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc}); 773 } 774 775 // C.Callee is now known-emitted, so we no longer need to maintain its list 776 // of callees in CUDACallGraph. 777 S.CUDACallGraph.erase(CGIt); 778 } 779 } 780 781 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 782 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 783 assert(Callee && "Callee may not be null."); 784 // FIXME: Is bailing out early correct here? Should we instead assume that 785 // the caller is a global initializer? 786 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 787 if (!Caller) 788 return true; 789 790 // If the caller is known-emitted, mark the callee as known-emitted. 791 // Otherwise, mark the call in our call graph so we can traverse it later. 792 bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); 793 if (CallerKnownEmitted) 794 MarkKnownEmitted(*this, Caller, Callee, Loc); 795 else { 796 // If we have 797 // host fn calls kernel fn calls host+device, 798 // the HD function does not get instantiated on the host. We model this by 799 // omitting at the call to the kernel from the callgraph. This ensures 800 // that, when compiling for host, only HD functions actually called from the 801 // host get marked as known-emitted. 802 if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) 803 CUDACallGraph[Caller].insert({Callee, Loc}); 804 } 805 806 CUDADiagBuilder::Kind DiagKind = [&] { 807 switch (IdentifyCUDAPreference(Caller, Callee)) { 808 case CFP_Never: 809 return CUDADiagBuilder::K_Immediate; 810 case CFP_WrongSide: 811 assert(Caller && "WrongSide calls require a non-null caller"); 812 // If we know the caller will be emitted, we know this wrong-side call 813 // will be emitted, so it's an immediate error. Otherwise, defer the 814 // error until we know the caller is emitted. 815 return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack 816 : CUDADiagBuilder::K_Deferred; 817 default: 818 return CUDADiagBuilder::K_Nop; 819 } 820 }(); 821 822 if (DiagKind == CUDADiagBuilder::K_Nop) 823 return true; 824 825 // Avoid emitting this error twice for the same location. Using a hashtable 826 // like this is unfortunate, but because we must continue parsing as normal 827 // after encountering a deferred error, it's otherwise very tricky for us to 828 // ensure that we only emit this deferred error once. 829 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 830 return true; 831 832 CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 833 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 834 CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 835 Caller, *this) 836 << Callee; 837 return DiagKind != CUDADiagBuilder::K_Immediate && 838 DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack; 839 } 840 841 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 842 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 843 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 844 return; 845 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 846 if (!CurFn) 847 return; 848 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 849 if (Target == CFT_Global || Target == CFT_Device) { 850 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 851 } else if (Target == CFT_HostDevice) { 852 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 853 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 854 } 855 } 856 857 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 858 const LookupResult &Previous) { 859 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 860 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 861 for (NamedDecl *OldND : Previous) { 862 FunctionDecl *OldFD = OldND->getAsFunction(); 863 if (!OldFD) 864 continue; 865 866 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 867 // Don't allow HD and global functions to overload other functions with the 868 // same signature. We allow overloading based on CUDA attributes so that 869 // functions can have different implementations on the host and device, but 870 // HD/global functions "exist" in some sense on both the host and device, so 871 // should have the same implementation on both sides. 872 if (NewTarget != OldTarget && 873 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 874 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 875 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 876 /* ConsiderCudaAttrs = */ false)) { 877 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 878 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 879 Diag(OldFD->getLocation(), diag::note_previous_declaration); 880 NewFD->setInvalidDecl(); 881 break; 882 } 883 } 884 } 885 886 template <typename AttrTy> 887 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 888 const FunctionDecl &TemplateFD) { 889 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 890 AttrTy *Clone = Attribute->clone(S.Context); 891 Clone->setInherited(true); 892 FD->addAttr(Clone); 893 } 894 } 895 896 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 897 const FunctionTemplateDecl &TD) { 898 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 899 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 900 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 901 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 902 } 903